Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *  linux/mm/oom_kill.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *  Copyright (C)  1998,2000  Rik van Riel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *	Thanks go out to Claus Fischer for some serious inspiration and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *	for goading me into coding this file...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *  Copyright (C)  2010  Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *	Rewritten by David Rientjes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *  The routines in this file are used to kill a process when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *  we're seriously out of memory. This gets called from __alloc_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *  in mm/page_alloc.c when we really run out of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *  Since we won't call these routines often (on a well-configured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *  machine) this file will double as a 'coding guide' and a signpost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  *  for newbie kernel hackers. It features several pointers to major
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  *  kernel subsystems and hints as to where to find out what things do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/sched/coredump.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/sched/debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/timex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/jiffies.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/freezer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/ftrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include "slab.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <trace/events/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <trace/hooks/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) int sysctl_panic_on_oom;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) int sysctl_oom_kill_allocating_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) int sysctl_oom_dump_tasks = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * Serializes oom killer invocations (out_of_memory()) from all contexts to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  * prevent from over eager oom killing (e.g. when the oom killer is invoked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * from different domains).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  * oom_killer_disable() relies on this lock to stabilize oom_killer_disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * and mark_oom_victim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) DEFINE_MUTEX(oom_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) /* Serializes oom_score_adj and oom_score_adj_min updates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) DEFINE_MUTEX(oom_adj_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) static inline bool is_memcg_oom(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	return oc->memcg != NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #ifdef CONFIG_NUMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81)  * oom_cpuset_eligible() - check task eligiblity for kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82)  * @start: task struct of which task to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83)  * @oc: pointer to struct oom_control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85)  * Task eligibility is determined by whether or not a candidate task, @tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86)  * shares the same mempolicy nodes as current if it is bound by such a policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87)  * and whether or not it has the same set of allowed cpuset nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89)  * This function is assuming oom-killer context and 'current' has triggered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90)  * the oom-killer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) static bool oom_cpuset_eligible(struct task_struct *start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 				struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	const nodemask_t *mask = oc->nodemask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	if (is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	for_each_thread(start, tsk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 		if (mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 			 * If this is a mempolicy constrained oom, tsk's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 			 * cpuset is irrelevant.  Only return true if its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 			 * mempolicy intersects current, otherwise it may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 			 * needlessly killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 			ret = mempolicy_nodemask_intersects(tsk, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 			 * This is not a mempolicy constrained oom, so only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 			 * check the mems of tsk's cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 			ret = cpuset_mems_allowed_intersects(current, tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) static bool oom_cpuset_eligible(struct task_struct *tsk, struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) #endif /* CONFIG_NUMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134)  * The process p may have detached its own ->mm while exiting or through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135)  * kthread_use_mm(), but one or more of its subthreads may still have a valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136)  * pointer.  Return p, or any of its subthreads with a valid ->mm, with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137)  * task_lock() held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) struct task_struct *find_lock_task_mm(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	struct task_struct *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	for_each_thread(p, t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		task_lock(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 		if (likely(t->mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 			goto found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 		task_unlock(t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	t = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) found:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	return t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  * order == -1 means the oom kill is required by sysrq, otherwise only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160)  * for display purposes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) static inline bool is_sysrq_oom(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	return oc->order == -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) /* return true if the task is not adequate as candidate victim task. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) static bool oom_unkillable_task(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	if (is_global_init(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	if (p->flags & PF_KTHREAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178)  * Print out unreclaimble slabs info when unreclaimable slabs amount is greater
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179)  * than all user memory (LRU pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) static bool is_dump_unreclaim_slabs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	unsigned long nr_lru;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 		 global_node_page_state(NR_INACTIVE_ANON) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		 global_node_page_state(NR_ACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		 global_node_page_state(NR_INACTIVE_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		 global_node_page_state(NR_ISOLATED_ANON) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 		 global_node_page_state(NR_ISOLATED_FILE) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		 global_node_page_state(NR_UNEVICTABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  * oom_badness - heuristic function to determine which candidate task to kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198)  * @p: task struct of which task we should calculate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199)  * @totalpages: total present RAM allowed for page allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201)  * The heuristic for determining which task to kill is made to be as simple and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  * predictable as possible.  The goal is to return the highest value for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  * task consuming the most memory to avoid subsequent oom failures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) long oom_badness(struct task_struct *p, unsigned long totalpages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	long points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	long adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	if (oom_unkillable_task(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		return LONG_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	p = find_lock_task_mm(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		return LONG_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	 * Do not even consider tasks which are explicitly marked oom
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	 * unkillable or have been already oom reaped or the are in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	 * the middle of vfork
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	adj = (long)p->signal->oom_score_adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	if (adj == OOM_SCORE_ADJ_MIN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 			test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 			in_vfork(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		task_unlock(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 		return LONG_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	 * The baseline for the badness score is the proportion of RAM that each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	 * task's rss, pagetable and swap space use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 		mm_pgtables_bytes(p->mm) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	task_unlock(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	/* Normalize to oom_score_adj units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	adj *= totalpages / 1000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	points += adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	return points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) static const char * const oom_constraint_text[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	[CONSTRAINT_NONE] = "CONSTRAINT_NONE",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	[CONSTRAINT_CPUSET] = "CONSTRAINT_CPUSET",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	[CONSTRAINT_MEMORY_POLICY] = "CONSTRAINT_MEMORY_POLICY",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	[CONSTRAINT_MEMCG] = "CONSTRAINT_MEMCG",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253)  * Determine the type of allocation constraint.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) static enum oom_constraint constrained_alloc(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	struct zone *zone;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	struct zoneref *z;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	enum zone_type highest_zoneidx = gfp_zone(oc->gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	bool cpuset_limited = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	int nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	if (is_memcg_oom(oc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 		oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 		return CONSTRAINT_MEMCG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	/* Default to all available memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	oc->totalpages = totalram_pages() + total_swap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	if (!IS_ENABLED(CONFIG_NUMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		return CONSTRAINT_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	if (!oc->zonelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		return CONSTRAINT_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	 * Reach here only when __GFP_NOFAIL is used. So, we should avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	 * to kill current.We have to random task kill in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	if (oc->gfp_mask & __GFP_THISNODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		return CONSTRAINT_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	 * This is not a __GFP_THISNODE allocation, so a truncated nodemask in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	 * the page allocator means a mempolicy is in effect.  Cpuset policy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	 * is enforced in get_page_from_freelist().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	if (oc->nodemask &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	    !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 		oc->totalpages = total_swap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		for_each_node_mask(nid, *oc->nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 			oc->totalpages += node_present_pages(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		return CONSTRAINT_MEMORY_POLICY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	/* Check this allocation failure is caused by cpuset's wall function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 			highest_zoneidx, oc->nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		if (!cpuset_zone_allowed(zone, oc->gfp_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 			cpuset_limited = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	if (cpuset_limited) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		oc->totalpages = total_swap_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		for_each_node_mask(nid, cpuset_current_mems_allowed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 			oc->totalpages += node_present_pages(nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		return CONSTRAINT_CPUSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	return CONSTRAINT_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) static int oom_evaluate_task(struct task_struct *task, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	struct oom_control *oc = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	long points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	if (oom_unkillable_task(task))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	/* p may not have freeable memory in nodemask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	if (!is_memcg_oom(oc) && !oom_cpuset_eligible(task, oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	 * This task already has access to memory reserves and is being killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	 * Don't allow any other task to have access to the reserves unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	 * the task has MMF_OOM_SKIP because chances that it would release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	 * any memory is quite low.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 		if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	 * If task is allocating a lot of memory and has been marked to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	 * killed first if it triggers an oom, then select it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	if (oom_task_origin(task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		points = LONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		goto select;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	points = oom_badness(task, oc->totalpages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	if (points == LONG_MIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	 * Check to see if this is the worst task with a non-negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	 * ADJ score seen so far
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	if (task->signal->oom_score_adj >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	    points > oc->chosen_non_negative_adj_points) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		if (oc->chosen_non_negative_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 			put_task_struct(oc->chosen_non_negative_adj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		oc->chosen_non_negative_adj = task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		oc->chosen_non_negative_adj_points = points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	if (points < oc->chosen_points)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) select:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	if (oc->chosen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		put_task_struct(oc->chosen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	oc->chosen = task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	oc->chosen_points = points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	if (oc->chosen_non_negative_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		put_task_struct(oc->chosen_non_negative_adj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	if (oc->chosen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 		put_task_struct(oc->chosen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	oc->chosen_non_negative_adj = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	oc->chosen = (void *)-1UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  * Simple selection loop. We choose the process with the highest number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  * 'points'. In case scan was aborted, oc->chosen is set to -1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) static void select_bad_process(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	oc->chosen_points = LONG_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	oc->chosen_non_negative_adj_points = LONG_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	oc->chosen_non_negative_adj = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	if (is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 		struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		for_each_process(p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 			if (oom_evaluate_task(p, oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	if (oc->chosen_non_negative_adj) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		 * If oc->chosen has a negative ADJ, and we found a task with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		 * a postive ADJ to kill, kill the task with the positive ADJ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		 * instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 		if (oc->chosen && oc->chosen->signal->oom_score_adj < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 			put_task_struct(oc->chosen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 			oc->chosen = oc->chosen_non_negative_adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 			oc->chosen_points = oc->chosen_non_negative_adj_points;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 			put_task_struct(oc->chosen_non_negative_adj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) static int dump_task(struct task_struct *p, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	struct oom_control *oc = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	if (oom_unkillable_task(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	/* p may not have freeable memory in nodemask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	task = find_lock_task_mm(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	if (!task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 		 * This is a kthread or all of p's threads have already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 		 * detached their mm's.  There's no need to report
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		 * them; they can't be oom killed anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		task->pid, from_kuid(&init_user_ns, task_uid(task)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		mm_pgtables_bytes(task->mm),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		get_mm_counter(task->mm, MM_SWAPENTS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		task->signal->oom_score_adj, task->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	task_unlock(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455)  * dump_tasks - dump current memory state of all system tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456)  * @oc: pointer to struct oom_control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458)  * Dumps the current memory state of all eligible tasks.  Tasks not in the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459)  * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  * are not shown.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461)  * State information includes task's pid, uid, tgid, vm size, rss,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462)  * pgtables_bytes, swapents, oom_score_adj value, and name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) static void dump_tasks(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	pr_info("Tasks state (memory values in pages):\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	if (is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		mem_cgroup_scan_tasks(oc->memcg, dump_task, oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		for_each_process(p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 			dump_task(p, oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	/* one line summary of the oom killer context. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 			oom_constraint_text[oc->constraint],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 			nodemask_pr_args(oc->nodemask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	cpuset_print_current_mems_allowed();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	mem_cgroup_print_oom_context(oc->memcg, victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	pr_cont(",task=%s,pid=%d,uid=%d\n", victim->comm, victim->pid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		from_kuid(&init_user_ns, task_uid(victim)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) static void dump_header(struct oom_control *oc, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 			current->signal->oom_score_adj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		pr_warn("COMPACTION is disabled!!!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	if (is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		mem_cgroup_print_oom_meminfo(oc->memcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		if (is_dump_unreclaim_slabs())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 			dump_unreclaimable_slab();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	if (sysctl_oom_dump_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 		dump_tasks(oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	if (p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		dump_oom_summary(oc, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516)  * Number of OOM victims in flight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) static atomic_t oom_victims = ATOMIC_INIT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) static bool oom_killer_disabled __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) #define K(x) ((x) << (PAGE_SHIFT-10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526)  * task->mm can be NULL if the task is the exited group leader.  So to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527)  * determine whether the task is using a particular mm, we examine all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528)  * task's threads: if one of those is using this mm then this task was also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529)  * using it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	struct task_struct *t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	for_each_thread(p, t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		struct mm_struct *t_mm = READ_ONCE(t->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		if (t_mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 			return t_mm == mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545)  * OOM Reaper kernel thread which tries to reap the memory used by the OOM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546)  * victim (if that is possible) to help the OOM killer to move on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) static struct task_struct *oom_reaper_th;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) static struct task_struct *oom_reaper_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) static DEFINE_SPINLOCK(oom_reaper_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) bool __oom_reap_task_mm(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	 * Tell all users of get_user/copy_from_user etc... that the content
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	 * is no longer stable. No barriers really needed because unmapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	 * should imply barriers already and the reader would hit a page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	 * if it stumbled over a reaped memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	set_bit(MMF_UNSTABLE, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		if (!can_madv_lru_vma(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		 * Only anonymous pages have a good chance to be dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		 * without additional steps which we cannot afford as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		 * are OOM already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		 * We do not even care about fs backed pages because all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		 * which are reclaimable have already been reclaimed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		 * we do not want to block exit_mmap by keeping mm ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		 * count elevated without a good reason.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 			struct mmu_notifier_range range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 			struct mmu_gather tlb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 			mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 						vma, mm, vma->vm_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 						vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 			tlb_gather_mmu(&tlb, mm, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 			if (mmu_notifier_invalidate_range_start_nonblock(&range)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 				tlb_finish_mmu(&tlb, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 				ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 			unmap_page_range(&tlb, vma, range.start, range.end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 			mmu_notifier_invalidate_range_end(&range);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 			tlb_finish_mmu(&tlb, range.start, range.end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603)  * Reaps the address space of the give task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605)  * Returns true on success and false if none or part of the address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606)  * has been reclaimed and the caller should retry later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	if (!mmap_read_trylock(mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		trace_skip_task_reaping(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	 * work on the mm anymore. The check for MMF_OOM_SKIP must run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	 * under mmap_lock for reading because it serializes against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	 * mmap_write_lock();mmap_write_unlock() cycle in exit_mmap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 		trace_skip_task_reaping(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	trace_start_task_reaping(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	/* failed to reap part of the address space. Try again later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	ret = __oom_reap_task_mm(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 		goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 			task_pid_nr(tsk), tsk->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 			K(get_mm_counter(mm, MM_ANONPAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 			K(get_mm_counter(mm, MM_FILEPAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 			K(get_mm_counter(mm, MM_SHMEMPAGES)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	trace_finish_task_reaping(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) #define MAX_OOM_REAP_RETRIES 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) static void oom_reap_task(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	int attempts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	struct mm_struct *mm = tsk->signal->oom_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	/* Retry the mmap_read_trylock(mm) a few times */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		schedule_timeout_idle(HZ/10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	if (attempts <= MAX_OOM_REAP_RETRIES ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	    test_bit(MMF_OOM_SKIP, &mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		task_pid_nr(tsk), tsk->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	sched_show_task(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	debug_show_all_locks();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	tsk->oom_reaper_list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 	 * Hide this mm from OOM killer because it has been either reaped or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	 * somebody can't call mmap_write_unlock(mm).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	set_bit(MMF_OOM_SKIP, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	/* Drop a reference taken by wake_oom_reaper */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	put_task_struct(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) static int oom_reaper(void *unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		struct task_struct *tsk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		spin_lock(&oom_reaper_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		if (oom_reaper_list != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 			tsk = oom_reaper_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 			oom_reaper_list = tsk->oom_reaper_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		spin_unlock(&oom_reaper_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		if (tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 			oom_reap_task(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) static void wake_oom_reaper(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	/* mm is already queued? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	get_task_struct(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	spin_lock(&oom_reaper_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	tsk->oom_reaper_list = oom_reaper_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	oom_reaper_list = tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	spin_unlock(&oom_reaper_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	trace_wake_reaper(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	wake_up(&oom_reaper_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) static int __init oom_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) subsys_initcall(oom_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) static inline void wake_oom_reaper(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) #endif /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729)  * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730)  * under task_lock or operate on the current).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) static void __mark_oom_victim(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	struct mm_struct *mm = tsk->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 		mmgrab(tsk->signal->oom_mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 		set_bit(MMF_OOM_VICTIM, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743)  * mark_oom_victim - mark the given task as OOM victim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744)  * @tsk: task to mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746)  * Has to be called with oom_lock held and never after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747)  * oom has been disabled already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749)  * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750)  * under task_lock or operate on the current).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) static void mark_oom_victim(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	WARN_ON(oom_killer_disabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	/* OOM killer might race with memcg OOM */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	/* oom_mm is bound to the signal struct life time. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	__mark_oom_victim(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	 * Make sure that the task is woken up from uninterruptible sleep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	 * if it is frozen because OOM killer wouldn't be able to free
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	 * any memory and livelock. freezing_slow_path will tell the freezer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	 * that TIF_MEMDIE tasks should be ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	__thaw_task(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	atomic_inc(&oom_victims);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	trace_mark_victim(tsk->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774)  * exit_oom_victim - note the exit of an OOM victim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) void exit_oom_victim(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	clear_thread_flag(TIF_MEMDIE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	if (!atomic_dec_return(&oom_victims))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 		wake_up_all(&oom_victims_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785)  * oom_killer_enable - enable OOM killer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) void oom_killer_enable(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	oom_killer_disabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	pr_info("OOM killer enabled.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794)  * oom_killer_disable - disable OOM killer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795)  * @timeout: maximum timeout to wait for oom victims in jiffies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797)  * Forces all page allocations to fail rather than trigger OOM killer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798)  * Will block and wait until all OOM victims are killed or the given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799)  * timeout expires.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801)  * The function cannot be called when there are runnable user tasks because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802)  * the userspace would see unexpected allocation failures as a result. Any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803)  * new usage of this function should be consulted with MM people.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805)  * Returns true if successful and false if the OOM killer cannot be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806)  * disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) bool oom_killer_disable(signed long timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	signed long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	 * Make sure to not race with an ongoing OOM killer. Check that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	 * current is not killed (possibly due to sharing the victim's memory).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	if (mutex_lock_killable(&oom_lock))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	oom_killer_disabled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	mutex_unlock(&oom_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	ret = wait_event_interruptible_timeout(oom_victims_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 			!atomic_read(&oom_victims), timeout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		oom_killer_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	pr_info("OOM killer disabled.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) static inline bool __task_will_free_mem(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	struct signal_struct *sig = task->signal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	 * A coredumping process may sleep for an extended period in exit_mm(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	 * so the oom killer cannot assume that the process will promptly exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	 * and release memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	if (sig->flags & SIGNAL_GROUP_COREDUMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	if (sig->flags & SIGNAL_GROUP_EXIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	if (thread_group_empty(task) && (task->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854)  * Checks whether the given task is dying or exiting and likely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855)  * release its address space. This means that all threads and processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856)  * sharing the same mm have to be killed or exiting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857)  * Caller has to make sure that task->mm is stable (hold task_lock or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858)  * it operates on the current).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) static bool task_will_free_mem(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	struct mm_struct *mm = task->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	bool ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	 * Skip tasks without mm because it might have passed its exit_mm and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	 * exit_oom_victim. oom_reaper could have rescued that but do not rely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	 * on that for now. We can consider find_lock_task_mm in future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	if (!mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	if (!__task_will_free_mem(task))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	 * This task has already been drained by the oom reaper so there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	 * only small chances it will free some more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	if (test_bit(MMF_OOM_SKIP, &mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	if (atomic_read(&mm->mm_users) <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	 * Make sure that all tasks which share the mm with the given tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	 * are dying as well to make sure that a) nobody pins its mm and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	 * b) the task is also reapable by the oom reaper.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	for_each_process(p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		if (!process_shares_mm(p, mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		if (same_thread_group(task, p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		ret = __task_will_free_mem(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) static void __oom_kill_process(struct task_struct *victim, const char *message)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	bool can_oom_reap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	p = find_lock_task_mm(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	if (!p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		pr_info("%s: OOM victim %d (%s) is already exiting. Skip killing the task\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 			message, task_pid_nr(victim), victim->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		put_task_struct(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	} else if (victim != p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		get_task_struct(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		put_task_struct(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		victim = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	/* Get a reference to safely compare mm after task_unlock(victim) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	mm = victim->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	mmgrab(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	/* Raise event before sending signal: task reaper must see this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	count_vm_event(OOM_KILL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	 * We should send SIGKILL before granting access to memory reserves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	 * in order to prevent the OOM victim from depleting the memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	 * reserves from the user space under its control.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	mark_oom_victim(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u pgtables:%lukB oom_score_adj:%hd\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 		message, task_pid_nr(victim), victim->comm, K(mm->total_vm),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 		K(get_mm_counter(mm, MM_ANONPAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 		K(get_mm_counter(mm, MM_FILEPAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		K(get_mm_counter(mm, MM_SHMEMPAGES)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		from_kuid(&init_user_ns, task_uid(victim)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	task_unlock(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	 * Kill all user processes sharing victim->mm in other thread groups, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	 * any.  They don't get access to memory reserves, though, to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	 * depletion of all memory.  This prevents mm->mmap_lock livelock when an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	 * oom killed thread cannot exit because it requires the semaphore and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	 * its contended by another thread trying to allocate memory itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	 * That thread will now get access to memory reserves since it has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	 * pending fatal signal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	for_each_process(p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 		if (!process_shares_mm(p, mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		if (same_thread_group(p, victim))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		if (is_global_init(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 			can_oom_reap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 			set_bit(MMF_OOM_SKIP, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 			pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 					task_pid_nr(victim), victim->comm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 					task_pid_nr(p), p->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		 * No kthead_use_mm() user needs to read from the userspace so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		 * we are ok to reap it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		if (unlikely(p->flags & PF_KTHREAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	if (can_oom_reap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		wake_oom_reaper(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	mmdrop(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	put_task_struct(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) #undef K
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991)  * Kill provided task unless it's secured by setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992)  * oom_score_adj to OOM_SCORE_ADJ_MIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) static int oom_kill_memcg_member(struct task_struct *task, void *message)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	    !is_global_init(task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		__oom_kill_process(task, message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) static void oom_kill_process(struct oom_control *oc, const char *message)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	struct task_struct *victim = oc->chosen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	struct mem_cgroup *oom_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 					      DEFAULT_RATELIMIT_BURST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	 * If the task is already exiting, don't alarm the sysadmin or kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	 * its children or threads, just give it access to memory reserves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	 * so it can die quickly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	task_lock(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	if (task_will_free_mem(victim)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		mark_oom_victim(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		wake_oom_reaper(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 		task_unlock(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		put_task_struct(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	task_unlock(victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	if (__ratelimit(&oom_rs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		dump_header(oc, victim);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	 * Do we need to kill the entire memory cgroup?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	 * Or even one of the ancestor memory cgroups?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	 * Check this out before killing the victim task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	__oom_kill_process(victim, message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	 * If necessary, kill all tasks in the selected memory cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	if (oom_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		mem_cgroup_print_oom_group(oom_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 				      (void*)message);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		mem_cgroup_put(oom_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)  * Determines whether the kernel must panic because of the panic_on_oom sysctl.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) static void check_panic_on_oom(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	if (likely(!sysctl_panic_on_oom))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	if (sysctl_panic_on_oom != 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		 * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		 * does not panic for cpuset, mempolicy, or memcg allocation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		 * failures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 		if (oc->constraint != CONSTRAINT_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	/* Do not panic for oom kills triggered by sysrq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	if (is_sysrq_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	dump_header(oc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	panic("Out of memory: %s panic_on_oom is enabled\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 		sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) int register_oom_notifier(struct notifier_block *nb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	return blocking_notifier_chain_register(&oom_notify_list, nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) EXPORT_SYMBOL_GPL(register_oom_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) int unregister_oom_notifier(struct notifier_block *nb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	return blocking_notifier_chain_unregister(&oom_notify_list, nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) EXPORT_SYMBOL_GPL(unregister_oom_notifier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)  * out_of_memory - kill the "best" process when we run out of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)  * @oc: pointer to struct oom_control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)  * If we run out of memory, we have the choice between either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)  * killing a random task (bad), letting the system crash (worse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093)  * OR try to be smart about which process to kill. Note that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)  * don't have to be perfect here, we just have to be good.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) bool out_of_memory(struct oom_control *oc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	unsigned long freed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	if (oom_killer_disabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	if (!is_memcg_oom(oc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		if (freed > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 			/* Got some memory back in the last second. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	 * If current has a pending SIGKILL or is exiting, then automatically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	 * select it.  The goal is to allow it to allocate so that it may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	 * quickly exit and free its memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	if (task_will_free_mem(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 		mark_oom_victim(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 		wake_oom_reaper(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	 * The OOM killer does not compensate for IO-less reclaim.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	 * pagefault_out_of_memory lost its gfp context so we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	 * make sure exclude 0 mask - all other users should have at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	 * ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	 * invoke the OOM killer even if it is a GFP_NOFS allocation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	 * Check if there were limitations on the allocation (only relevant for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	 * NUMA and memcg) that may require different handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	oc->constraint = constrained_alloc(oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 		oc->nodemask = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	check_panic_on_oom(oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	    current->mm && !oom_unkillable_task(current) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	    oom_cpuset_eligible(current, oc) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 		get_task_struct(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		oc->chosen = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	select_bad_process(oc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	/* Found nothing?!?! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	if (!oc->chosen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		int ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 		trace_android_vh_oom_check_panic(oc, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		dump_header(oc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		pr_warn("Out of memory and no killable processes...\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		 * If we got here due to an actual allocation at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 		 * system level, we cannot survive this and will enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		 * an endless loop in the allocator. Bail out now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 			panic("System is deadlocked on memory\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	if (oc->chosen && oc->chosen != (void *)-1UL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 				 "Memory cgroup out of memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	return !!oc->chosen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)  * The pagefault handler calls here because some allocation has failed. We have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)  * to take care of the memcg OOM here because this is the only safe context without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)  * any locks held but let the oom killer triggered from the allocation context care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)  * about the global OOM.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) void pagefault_out_of_memory(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 				      DEFAULT_RATELIMIT_BURST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	if (mem_cgroup_oom_synchronize(true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	if (fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	if (__ratelimit(&pfoom_rs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	struct mm_struct *mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	unsigned int f_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	bool reap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	struct pid *pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	if (flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	pid = pidfd_get_pid(pidfd, &f_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	if (IS_ERR(pid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 		return PTR_ERR(pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	task = get_pid_task(pid, PIDTYPE_TGID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	if (!task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		ret = -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		goto put_pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	 * Make sure to choose a thread which still has a reference to mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	 * during the group exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	p = find_lock_task_mm(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	if (!p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		ret = -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		goto put_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	mm = p->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	mmgrab(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	 * If we are too late and exit_mmap already checked mm_is_oom_victim
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	 * then will block on mmap_read_lock until exit_mmap releases mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	set_bit(MMF_OOM_VICTIM, &mm->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	if (task_will_free_mem(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		reap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		/* Error only if the work has not been done already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 		if (!test_bit(MMF_OOM_SKIP, &mm->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 			ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	task_unlock(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	if (!reap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 		goto drop_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	if (mmap_read_lock_killable(mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 		goto drop_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	 * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	 * possible change in exit_mmap is seen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) drop_mm:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	mmdrop(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) put_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	put_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) put_pid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	put_pid(pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	return -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) #endif /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) void add_to_oom_reaper(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	p = find_lock_task_mm(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	if (!p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	get_task_struct(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	if (task_will_free_mem(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		__mark_oom_victim(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		wake_oom_reaper(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	task_unlock(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	put_task_struct(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }