Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * linux/kernel/seccomp.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright 2004-2005  Andrea Arcangeli <andrea@cpushare.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * Copyright (C) 2012 Google, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * Will Drewry <wad@chromium.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * This defines a simple but solid secure-computing facility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * Mode 1 uses a fixed list of allowed system calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * Mode 2 allows user-defined system call filters in the form
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *        of Berkeley Packet Filters/Linux Socket Filters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #define pr_fmt(fmt) "seccomp: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/refcount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/coredump.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/kmemleak.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/nospec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/sched/task_stack.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/seccomp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/syscalls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <asm/syscall.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #ifdef CONFIG_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/filter.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/pid.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/capability.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/tracehook.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/anon_inodes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/lockdep.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  * wrong direction flag in the ioctl number. This is the broken one,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * which the kernel needs to keep supporting until all userspaces stop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  * using the wrong command number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR	SECCOMP_IOR(2, __u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) enum notify_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	SECCOMP_NOTIFY_INIT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	SECCOMP_NOTIFY_SENT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	SECCOMP_NOTIFY_REPLIED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) struct seccomp_knotif {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	/* The struct pid of the task whose filter triggered the notification */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 	/* The "cookie" for this request; this is unique for this filter. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	u64 id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	 * The seccomp data. This pointer is valid the entire time this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	 * notification is active, since it comes from __seccomp_filter which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	 * eclipses the entire lifecycle here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	const struct seccomp_data *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	 * Notification states. When SECCOMP_RET_USER_NOTIF is returned, a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	 * struct seccomp_knotif is created and starts out in INIT. Once the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	 * handler reads the notification off of an FD, it transitions to SENT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	 * If a signal is received the state transitions back to INIT and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	 * another message is sent. When the userspace handler replies, state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	 * transitions to REPLIED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	enum notify_state state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	/* The return values, only valid when in SECCOMP_NOTIFY_REPLIED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	long val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	 * Signals when this has changed states, such as the listener
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	 * dying, a new seccomp addfd message, or changing to REPLIED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	struct completion ready;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	/* outstanding addfd requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	struct list_head addfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  * struct seccomp_kaddfd - container for seccomp_addfd ioctl messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  * @file: A reference to the file to install in the other task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  * @fd: The fd number to install it at. If the fd number is -1, it means the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  *      installing process should allocate the fd as normal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  *         is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  * @ret: The return value of the installing process. It is set to the fd num
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  *       upon success (>= 0).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * @completion: Indicates that the installing process has completed fd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  *              installation, or gone away (either due to successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  *              reply, or signal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) struct seccomp_kaddfd {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	struct file *file;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	int fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	unsigned int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	/* To only be set on reply */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	struct completion completion;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129)  * struct notification - container for seccomp userspace notifications. Since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130)  * most seccomp filters will not have notification listeners attached and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131)  * structure is fairly large, we store the notification-specific stuff in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132)  * separate structure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134)  * @request: A semaphore that users of this notification can wait on for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135)  *           changes. Actual reads and writes are still controlled with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136)  *           filter->notify_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137)  * @next_id: The id of the next request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  * @notifications: A list of struct seccomp_knotif elements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) struct notification {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	struct semaphore request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	u64 next_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	struct list_head notifications;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) #ifdef SECCOMP_ARCH_NATIVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148)  * struct action_cache - per-filter cache of seccomp actions per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149)  * arch/syscall pair
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151)  * @allow_native: A bitmap where each bit represents whether the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152)  *		  filter will always allow the syscall, for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153)  *		  native architecture.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154)  * @allow_compat: A bitmap where each bit represents whether the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155)  *		  filter will always allow the syscall, for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156)  *		  compat architecture.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) struct action_cache {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) #ifdef SECCOMP_ARCH_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) struct action_cache { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 					     const struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) #endif /* SECCOMP_ARCH_NATIVE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179)  * struct seccomp_filter - container for seccomp BPF programs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181)  * @refs: Reference count to manage the object lifetime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182)  *	  A filter's reference count is incremented for each directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183)  *	  attached task, once for the dependent filter, and if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184)  *	  requested for the user notifier. When @refs reaches zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185)  *	  the filter can be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186)  * @users: A filter's @users count is incremented for each directly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187)  *         attached task (filter installation, fork(), thread_sync),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188)  *	   and once for the dependent filter (tracked in filter->prev).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  *	   When it reaches zero it indicates that no direct or indirect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190)  *	   users of that filter exist. No new tasks can get associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191)  *	   this filter after reaching 0. The @users count is always smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192)  *	   or equal to @refs. Hence, reaching 0 for @users does not mean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  *	   the filter can be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * @cache: cache of arch/syscall mappings to actions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  * @prev: points to a previously installed, or inherited, filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  * @prog: the BPF program to evaluate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198)  * @notif: the struct that holds all notification related information
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199)  * @notify_lock: A lock for all notification-related accesses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200)  * @wqh: A wait queue for poll if a notifier is in use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  * seccomp_filter objects are organized in a tree linked via the @prev
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  * pointer.  For any task, it appears to be a singly-linked list starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)  * with current->seccomp.filter, the most recently attached or inherited filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205)  * However, multiple filters may share a @prev node, by way of fork(), which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206)  * results in a unidirectional tree existing in memory.  This is similar to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)  * how namespaces work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  * seccomp_filter objects should never be modified after being attached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  * to a task_struct (other than @refs).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) struct seccomp_filter {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	refcount_t users;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	bool log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	struct action_cache cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	struct seccomp_filter *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	struct bpf_prog *prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	struct notification *notif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	struct mutex notify_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	wait_queue_head_t wqh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) /* Limit any path through the tree to 256KB worth of instructions. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  * Endianness is explicitly ignored and left for BPF program authors to manage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * as per the specific architecture.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) static void populate_seccomp_data(struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	 * Instead of using current_pt_reg(), we're already doing the work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	 * to safely fetch "current", so just use "task" everywhere below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	struct task_struct *task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	struct pt_regs *regs = task_pt_regs(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	unsigned long args[6];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	sd->nr = syscall_get_nr(task, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	sd->arch = syscall_get_arch(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	syscall_get_arguments(task, regs, args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	sd->args[0] = args[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	sd->args[1] = args[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	sd->args[2] = args[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	sd->args[3] = args[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	sd->args[4] = args[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	sd->args[5] = args[5];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	sd->instruction_pointer = KSTK_EIP(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254)  *	seccomp_check_filter - verify seccomp filter code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255)  *	@filter: filter to verify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256)  *	@flen: length of filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258)  * Takes a previously checked filter (by bpf_check_classic) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259)  * redirects all filter code that loads struct sk_buff data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260)  * and related data through seccomp_bpf_load.  It also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261)  * enforces length and alignment checking of those loads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  * Returns 0 if the rule set is legal or -EINVAL if not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	int pc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	for (pc = 0; pc < flen; pc++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		struct sock_filter *ftest = &filter[pc];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		u16 code = ftest->code;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		u32 k = ftest->k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 		switch (code) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		case BPF_LD | BPF_W | BPF_ABS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 			ftest->code = BPF_LDX | BPF_W | BPF_ABS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 			/* 32-bit aligned and not out of bounds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 			if (k >= sizeof(struct seccomp_data) || k & 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 				return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 		case BPF_LD | BPF_W | BPF_LEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 			ftest->code = BPF_LD | BPF_IMM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 			ftest->k = sizeof(struct seccomp_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 		case BPF_LDX | BPF_W | BPF_LEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 			ftest->code = BPF_LDX | BPF_IMM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 			ftest->k = sizeof(struct seccomp_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 		/* Explicitly include allowed calls. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		case BPF_RET | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 		case BPF_RET | BPF_A:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 		case BPF_ALU | BPF_ADD | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		case BPF_ALU | BPF_ADD | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		case BPF_ALU | BPF_SUB | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 		case BPF_ALU | BPF_SUB | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		case BPF_ALU | BPF_MUL | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 		case BPF_ALU | BPF_MUL | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 		case BPF_ALU | BPF_DIV | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 		case BPF_ALU | BPF_DIV | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		case BPF_ALU | BPF_AND | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		case BPF_ALU | BPF_AND | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 		case BPF_ALU | BPF_OR | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		case BPF_ALU | BPF_OR | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		case BPF_ALU | BPF_XOR | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		case BPF_ALU | BPF_XOR | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		case BPF_ALU | BPF_LSH | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		case BPF_ALU | BPF_LSH | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		case BPF_ALU | BPF_RSH | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 		case BPF_ALU | BPF_RSH | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 		case BPF_ALU | BPF_NEG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		case BPF_LD | BPF_IMM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 		case BPF_LDX | BPF_IMM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 		case BPF_MISC | BPF_TAX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 		case BPF_MISC | BPF_TXA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 		case BPF_LD | BPF_MEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		case BPF_LDX | BPF_MEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		case BPF_ST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 		case BPF_STX:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		case BPF_JMP | BPF_JA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		case BPF_JMP | BPF_JEQ | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		case BPF_JMP | BPF_JEQ | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		case BPF_JMP | BPF_JGE | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		case BPF_JMP | BPF_JGE | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		case BPF_JMP | BPF_JGT | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		case BPF_JMP | BPF_JGT | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		case BPF_JMP | BPF_JSET | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		case BPF_JMP | BPF_JSET | BPF_X:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) #ifdef SECCOMP_ARCH_NATIVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 						    size_t bitmap_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 						    int syscall_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	return test_bit(syscall_nr, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  * seccomp_cache_check_allow - lookup seccomp cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349)  * @sfilter: The seccomp filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350)  * @sd: The seccomp data to lookup the cache with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352)  * Returns true if the seccomp_data is cached and allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 					     const struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	int syscall_nr = sd->nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	const struct action_cache *cache = &sfilter->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) #ifndef SECCOMP_ARCH_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	/* A native-only architecture doesn't need to check sd->arch. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	return seccomp_cache_check_allow_bitmap(cache->allow_native,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 						SECCOMP_ARCH_NATIVE_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 						syscall_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		return seccomp_cache_check_allow_bitmap(cache->allow_native,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 							SECCOMP_ARCH_NATIVE_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 							syscall_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 		return seccomp_cache_check_allow_bitmap(cache->allow_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 							SECCOMP_ARCH_COMPAT_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 							syscall_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) #endif /* SECCOMP_ARCH_COMPAT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	WARN_ON_ONCE(true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) #endif /* SECCOMP_ARCH_NATIVE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382)  * seccomp_run_filters - evaluates all seccomp filters against @sd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383)  * @sd: optional seccomp data to be passed to filters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384)  * @match: stores struct seccomp_filter that resulted in the return value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  *         unless filter returned SECCOMP_RET_ALLOW, in which case it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  *         be unchanged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388)  * Returns valid seccomp BPF response codes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) #define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) static u32 seccomp_run_filters(const struct seccomp_data *sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 			       struct seccomp_filter **match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	u32 ret = SECCOMP_RET_ALLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	/* Make sure cross-thread synced filter points somewhere sane. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	struct seccomp_filter *f =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 			READ_ONCE(current->seccomp.filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	/* Ensure unexpected behavior doesn't result in failing open. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	if (WARN_ON(f == NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		return SECCOMP_RET_KILL_PROCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	if (seccomp_cache_check_allow(f, sd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		return SECCOMP_RET_ALLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	 * All filters in the list are evaluated and the lowest BPF return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	 * value always takes priority (ignoring the DATA).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	for (; f; f = f->prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 			ret = cur_ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 			*match = f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) #endif /* CONFIG_SECCOMP_FILTER */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	assert_spin_locked(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) static inline void seccomp_assign_mode(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 				       unsigned long seccomp_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 				       unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	assert_spin_locked(&task->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	task->seccomp.mode = seccomp_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	 * Make sure TIF_SECCOMP cannot be set before the mode (and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	 * filter) is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	smp_mb__before_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	/* Assume default seccomp processes want spec flaw mitigation. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		arch_seccomp_spec_mitigate(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	set_tsk_thread_flag(task, TIF_SECCOMP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) #ifdef CONFIG_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) /* Returns 1 if the parent is an ancestor of the child. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) static int is_ancestor(struct seccomp_filter *parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 		       struct seccomp_filter *child)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	/* NULL is the root ancestor. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	if (parent == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	for (; child; child = child->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		if (child == parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * seccomp_can_sync_threads: checks if all threads can be synchronized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  * Expects sighand and cred_guard_mutex locks to be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  * Returns 0 on success, -ve on error, or the pid of a thread which was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  * either not in the correct seccomp mode or did not have an ancestral
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473)  * seccomp filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) static inline pid_t seccomp_can_sync_threads(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	struct task_struct *thread, *caller;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	assert_spin_locked(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	/* Validate all threads being eligible for synchronization. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	caller = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	for_each_thread(caller, thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		pid_t failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		/* Skip current, since it is initiating the sync. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		if (thread == caller)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 		     is_ancestor(thread->seccomp.filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 				 caller->seccomp.filter)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		/* Return the first thread that cannot be synchronized. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		failed = task_pid_vnr(thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		/* If the pid cannot be resolved, then return -ESRCH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		if (WARN_ON(failed == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 			failed = -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		return failed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) static inline void seccomp_filter_free(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	if (filter) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		bpf_prog_destroy(filter->prog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		kfree(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) static void __seccomp_filter_orphan(struct seccomp_filter *orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	while (orig && refcount_dec_and_test(&orig->users)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 		if (waitqueue_active(&orig->wqh))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 			wake_up_poll(&orig->wqh, EPOLLHUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		orig = orig->prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) static void __put_seccomp_filter(struct seccomp_filter *orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	/* Clean up single-reference branches iteratively. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	while (orig && refcount_dec_and_test(&orig->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		struct seccomp_filter *freeme = orig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		orig = orig->prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		seccomp_filter_free(freeme);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) static void __seccomp_filter_release(struct seccomp_filter *orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	/* Notify about any unused filters in the task's former filter tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	__seccomp_filter_orphan(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	/* Finally drop all references to the task's former tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	__put_seccomp_filter(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544)  * seccomp_filter_release - Detach the task from its filter tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545)  *			    drop its reference count, and notify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546)  *			    about unused filters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548)  * This function should only be called when the task is exiting as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549)  * it detaches it from its filter tree. As such, READ_ONCE() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550)  * barriers are not needed here, as would normally be needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) void seccomp_filter_release(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	struct seccomp_filter *orig = tsk->seccomp.filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	/* Detach task from its filter tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	tsk->seccomp.filter = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	__seccomp_filter_release(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562)  * seccomp_sync_threads: sets all threads to use current's filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564)  * Expects sighand and cred_guard_mutex locks to be held, and for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565)  * seccomp_can_sync_threads() to have returned success already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  * without dropping the locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) static inline void seccomp_sync_threads(unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 	struct task_struct *thread, *caller;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 	assert_spin_locked(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	/* Synchronize all threads. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	caller = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	for_each_thread(caller, thread) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		/* Skip current, since it needs no changes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 		if (thread == caller)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 		/* Get a task reference for the new leaf node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 		get_seccomp_filter(caller);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 		 * Drop the task reference to the shared ancestor since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 		 * current's path will hold a reference.  (This also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 		 * allows a put before the assignment.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		__seccomp_filter_release(thread->seccomp.filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		/* Make our new filter tree visible. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 		smp_store_release(&thread->seccomp.filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 				  caller->seccomp.filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 		atomic_set(&thread->seccomp.filter_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 			   atomic_read(&caller->seccomp.filter_count));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		 * Don't let an unprivileged task work around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		 * the no_new_privs restriction by creating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		 * a thread that sets it up, enters seccomp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		 * then dies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		if (task_no_new_privs(caller))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 			task_set_no_new_privs(thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 		 * Opt the other thread into seccomp if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 		 * As threads are considered to be trust-realm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 		 * equivalent (see ptrace_may_access), it is safe to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		 * allow one thread to transition the other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 					    flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621)  * seccomp_prepare_filter: Prepares a seccomp filter for use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622)  * @fprog: BPF program to install
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624)  * Returns filter on success or an ERR_PTR on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	struct seccomp_filter *sfilter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	const bool save_orig =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) #if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	 * Installing a seccomp filter requires that the task has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	 * This avoids scenarios where unprivileged tasks can affect the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	 * behavior of privileged children.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	if (!task_no_new_privs(current) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 			!ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 		return ERR_PTR(-EACCES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	/* Allocate a new seccomp_filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	if (!sfilter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	mutex_init(&sfilter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 					seccomp_check_filter, save_orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 		kfree(sfilter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	refcount_set(&sfilter->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	refcount_set(&sfilter->users, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	init_waitqueue_head(&sfilter->wqh);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	return sfilter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673)  * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674)  * @user_filter: pointer to the user data containing a sock_fprog.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676)  * Returns 0 on success and non-zero otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) static struct seccomp_filter *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) seccomp_prepare_user_filter(const char __user *user_filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	struct sock_fprog fprog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	if (in_compat_syscall()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		struct compat_sock_fprog fprog32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		fprog.len = fprog32.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		fprog.filter = compat_ptr(fprog32.filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	} else /* falls through to the if below. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	filter = seccomp_prepare_filter(&fprog);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	return filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) #ifdef SECCOMP_ARCH_NATIVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702)  * seccomp_is_const_allow - check if filter is constant allow with given data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703)  * @fprog: The BPF programs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704)  * @sd: The seccomp data to check against, only syscall number and arch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705)  *      number are considered constant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 				   struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	unsigned int reg_value = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	unsigned int pc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	bool op_res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	if (WARN_ON_ONCE(!fprog))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	for (pc = 0; pc < fprog->len; pc++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		struct sock_filter *insn = &fprog->filter[pc];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		u16 code = insn->code;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		u32 k = insn->k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 		switch (code) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		case BPF_LD | BPF_W | BPF_ABS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 			switch (k) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 			case offsetof(struct seccomp_data, nr):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 				reg_value = sd->nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			case offsetof(struct seccomp_data, arch):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 				reg_value = sd->arch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 				/* can't optimize (non-constant value load) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 				return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		case BPF_RET | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 			/* reached return with constant values only, check allow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 			return k == SECCOMP_RET_ALLOW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 		case BPF_JMP | BPF_JA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 			pc += insn->k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		case BPF_JMP | BPF_JEQ | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		case BPF_JMP | BPF_JGE | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 		case BPF_JMP | BPF_JGT | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 		case BPF_JMP | BPF_JSET | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 			switch (BPF_OP(code)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 			case BPF_JEQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 				op_res = reg_value == k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 			case BPF_JGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 				op_res = reg_value >= k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 			case BPF_JGT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 				op_res = reg_value > k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 			case BPF_JSET:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 				op_res = !!(reg_value & k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 			default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 				/* can't optimize (unknown jump) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 				return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 			pc += op_res ? insn->jt : insn->jf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		case BPF_ALU | BPF_AND | BPF_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 			reg_value &= k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 			/* can't optimize (unknown insn) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	/* ran off the end of the filter?! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 					 void *bitmap, const void *bitmap_prev,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 					 size_t bitmap_size, int arch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	struct seccomp_data sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	if (bitmap_prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		/* The new filter must be as restrictive as the last. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		bitmap_copy(bitmap, bitmap_prev, bitmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 		/* Before any filters, all syscalls are always allowed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		bitmap_fill(bitmap, bitmap_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	for (nr = 0; nr < bitmap_size; nr++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 		/* No bitmap change: not a cacheable action. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		if (!test_bit(nr, bitmap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		sd.nr = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		sd.arch = arch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		/* No bitmap change: continue to always allow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		if (seccomp_is_const_allow(fprog, &sd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		 * Not a cacheable action: always run filters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		 * atomic clear_bit() not needed, filter not visible yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 		__clear_bit(nr, bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817)  * seccomp_cache_prepare - emulate the filter to find cachable syscalls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818)  * @sfilter: The seccomp filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820)  * Returns 0 if successful or -errno if error occurred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	struct action_cache *cache = &sfilter->cache;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	const struct action_cache *cache_prev =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		sfilter->prev ? &sfilter->prev->cache : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 				     cache_prev ? cache_prev->allow_native : NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 				     SECCOMP_ARCH_NATIVE_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 				     SECCOMP_ARCH_NATIVE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) #ifdef SECCOMP_ARCH_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 				     cache_prev ? cache_prev->allow_compat : NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 				     SECCOMP_ARCH_COMPAT_NR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 				     SECCOMP_ARCH_COMPAT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) #endif /* SECCOMP_ARCH_COMPAT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) #endif /* SECCOMP_ARCH_NATIVE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843)  * seccomp_attach_filter: validate and attach filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * @flags:  flags to change filter behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  * @filter: seccomp filter to add to the current process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847)  * Caller must be holding current->sighand->siglock lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849)  * Returns 0 on success, -ve on error, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850)  *   - in TSYNC mode: the pid of a thread which was either not in the correct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851)  *     seccomp mode or did not have an ancestral seccomp filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852)  *   - in NEW_LISTENER mode: the fd of the new listener
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) static long seccomp_attach_filter(unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 				  struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	unsigned long total_insns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	struct seccomp_filter *walker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	assert_spin_locked(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	/* Validate resulting filter length. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	total_insns = filter->prog->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	for (walker = current->seccomp.filter; walker; walker = walker->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 		total_insns += walker->prog->len + 4;  /* 4 instr penalty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	if (total_insns > MAX_INSNS_PER_PATH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	/* If thread sync has been requested, check that it is possible. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		ret = seccomp_can_sync_threads();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 			if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 				return -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	/* Set log flag, if present. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 	if (flags & SECCOMP_FILTER_FLAG_LOG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		filter->log = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	 * If there is an existing filter, make it the prev and don't drop its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	 * task reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	filter->prev = current->seccomp.filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	seccomp_cache_prepare(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	current->seccomp.filter = filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	atomic_inc(&current->seccomp.filter_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	/* Now that the new filter is in place, synchronize to all threads. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		seccomp_sync_threads(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) static void __get_seccomp_filter(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	refcount_inc(&filter->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) /* get_seccomp_filter - increments the reference count of the filter on @tsk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) void get_seccomp_filter(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	struct seccomp_filter *orig = tsk->seccomp.filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	if (!orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	__get_seccomp_filter(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	refcount_inc(&orig->users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	clear_siginfo(info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	info->si_signo = SIGSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	info->si_code = SYS_SECCOMP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	info->si_call_addr = (void __user *)KSTK_EIP(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	info->si_errno = reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	info->si_arch = syscall_get_arch(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	info->si_syscall = syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929)  * seccomp_send_sigsys - signals the task to allow in-process syscall emulation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930)  * @syscall: syscall number to send to userland
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931)  * @reason: filter-supplied reason code to send to userland (via si_errno)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933)  * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) static void seccomp_send_sigsys(int syscall, int reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	struct kernel_siginfo info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	seccomp_init_siginfo(&info, syscall, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	force_sig_info(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) #endif	/* CONFIG_SECCOMP_FILTER */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) /* For use with seccomp_actions_logged */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) #define SECCOMP_LOG_KILL_PROCESS	(1 << 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) #define SECCOMP_LOG_KILL_THREAD		(1 << 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) #define SECCOMP_LOG_TRAP		(1 << 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) #define SECCOMP_LOG_ERRNO		(1 << 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) #define SECCOMP_LOG_TRACE		(1 << 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) #define SECCOMP_LOG_LOG			(1 << 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) #define SECCOMP_LOG_ALLOW		(1 << 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) #define SECCOMP_LOG_USER_NOTIF		(1 << 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 				    SECCOMP_LOG_KILL_THREAD  |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 				    SECCOMP_LOG_TRAP  |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 				    SECCOMP_LOG_ERRNO |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 				    SECCOMP_LOG_USER_NOTIF |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 				    SECCOMP_LOG_TRACE |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 				    SECCOMP_LOG_LOG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 			       bool requested)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	bool log = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	case SECCOMP_RET_ALLOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 	case SECCOMP_RET_TRAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	case SECCOMP_RET_ERRNO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	case SECCOMP_RET_TRACE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	case SECCOMP_RET_USER_NOTIF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 		log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	case SECCOMP_RET_LOG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	case SECCOMP_RET_KILL_THREAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	case SECCOMP_RET_KILL_PROCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	 * Emit an audit message when the action is RET_KILL_*, RET_LOG, or the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	 * FILTER_FLAG_LOG bit was set. The admin has the ability to silence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	 * any action from being logged by removing the action name from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	 * seccomp_actions_logged sysctl.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	if (!log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	audit_seccomp(syscall, signr, action);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  * Secure computing mode 1 allows only read/write/exit/sigreturn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)  * To be fully secure this must be combined with rlimit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)  * to limit the stack allocations too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) static const int mode1_syscalls[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	-1, /* negative terminated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) static void __secure_computing_strict(int this_syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	const int *allowed_syscalls = mode1_syscalls;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) #ifdef CONFIG_COMPAT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	if (in_compat_syscall())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		allowed_syscalls = get_compat_mode1_syscalls();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		if (*allowed_syscalls == this_syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	} while (*++allowed_syscalls != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) #ifdef SECCOMP_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	dump_stack();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	do_exit(SIGKILL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) #ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) void secure_computing_strict(int this_syscall)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	int mode = current->seccomp.mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	if (mode == SECCOMP_MODE_DISABLED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	else if (mode == SECCOMP_MODE_STRICT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		__secure_computing_strict(this_syscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) #ifdef CONFIG_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	 * Note: overflow is ok here, the id just needs to be unique per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	 * filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	lockdep_assert_held(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	return filter->notif->next_id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	 * Remove the notification, and reset the list pointers, indicating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	 * that it has been handled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	list_del_init(&addfd->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	addfd->ret = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	complete(&addfd->completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) static int seccomp_do_user_notification(int this_syscall,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 					struct seccomp_filter *match,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 					const struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	u32 flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 	struct seccomp_knotif n = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	struct seccomp_kaddfd *addfd, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	mutex_lock(&match->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	err = -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	if (!match->notif)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	n.task = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	n.state = SECCOMP_NOTIFY_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	n.data = sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	n.id = seccomp_next_notify_id(match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	init_completion(&n.ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	list_add(&n.list, &match->notif->notifications);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	INIT_LIST_HEAD(&n.addfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	up(&match->notif->request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	 * This is where we wait for a reply from userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 		mutex_unlock(&match->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 		err = wait_for_completion_interruptible(&n.ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 		mutex_lock(&match->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 		if (err != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			goto interrupted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 		addfd = list_first_entry_or_null(&n.addfd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 						 struct seccomp_kaddfd, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 		/* Check if we were woken up by a addfd message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 		if (addfd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 			seccomp_handle_addfd(addfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	}  while (n.state != SECCOMP_NOTIFY_REPLIED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	ret = n.val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	err = n.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	flags = n.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) interrupted:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	/* If there were any pending addfd calls, clear them out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		/* The process went away before we got a chance to handle it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		addfd->ret = -ESRCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 		list_del_init(&addfd->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		complete(&addfd->completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	 * Note that it's possible the listener died in between the time when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	 * we were notified of a response (or a signal) and when we were able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	 * re-acquire the lock, so only delete from the list if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	 * notification actually exists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	 * Also note that this test is only valid because there's no way to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	 * *reattach* to a notifier right now. If one is added, we'll need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	 * keep track of the notif itself and make sure they match here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	if (match->notif)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		list_del(&n.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	mutex_unlock(&match->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	/* Userspace requests to continue the syscall. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	syscall_set_return_value(current, current_pt_regs(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 				 err, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 			    const bool recheck_after_trace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	u32 filter_ret, action;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	struct seccomp_filter *match = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	int data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	struct seccomp_data sd_local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	 * Make sure that any changes to mode from another thread have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 	 * been seen after TIF_SECCOMP was seen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	if (!sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		populate_seccomp_data(&sd_local);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		sd = &sd_local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	filter_ret = seccomp_run_filters(sd, &match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	data = filter_ret & SECCOMP_RET_DATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	action = filter_ret & SECCOMP_RET_ACTION_FULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	case SECCOMP_RET_ERRNO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		/* Set low-order bits as an errno, capped at MAX_ERRNO. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		if (data > MAX_ERRNO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 			data = MAX_ERRNO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 		syscall_set_return_value(current, current_pt_regs(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 					 -data, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	case SECCOMP_RET_TRAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		/* Show the handler the original registers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		syscall_rollback(current, current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		/* Let the filter pass back 16 bits of data. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		seccomp_send_sigsys(this_syscall, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	case SECCOMP_RET_TRACE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 		/* We've been put in this state by the ptracer already. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		if (recheck_after_trace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		/* ENOSYS these calls if there is no tracer attached. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 			syscall_set_return_value(current,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 						 current_pt_regs(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 						 -ENOSYS, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		/* Allow the BPF to provide the event message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 		ptrace_event(PTRACE_EVENT_SECCOMP, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 		 * The delivery of a fatal signal during event
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 		 * notification may silently skip tracer notification,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 		 * which could leave us with a potentially unmodified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 		 * syscall that the tracer would have liked to have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		 * changed. Since the process is about to die, we just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		 * force the syscall to be skipped and let the signal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 		 * kill the process and correctly handle any tracer exit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		 * notifications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 		if (fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		/* Check if the tracer forced the syscall to be skipped. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		this_syscall = syscall_get_nr(current, current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		if (this_syscall < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		 * Recheck the syscall, since it may have changed. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		 * intentionally uses a NULL struct seccomp_data to force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 		 * a reload of all registers. This does not goto skip since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		 * a skip would have already been reported.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		if (__seccomp_filter(this_syscall, NULL, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	case SECCOMP_RET_USER_NOTIF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		if (seccomp_do_user_notification(this_syscall, match, sd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			goto skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	case SECCOMP_RET_LOG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 		seccomp_log(this_syscall, 0, action, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	case SECCOMP_RET_ALLOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		 * Note that the "match" filter will always be NULL for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 		 * this action since SECCOMP_RET_ALLOW is the starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 		 * state in seccomp_run_filters().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	case SECCOMP_RET_KILL_THREAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	case SECCOMP_RET_KILL_PROCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		seccomp_log(this_syscall, SIGSYS, action, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		/* Dump core only if this is the last remaining thread. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		if (action != SECCOMP_RET_KILL_THREAD ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		    get_nr_threads(current) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 			kernel_siginfo_t info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 			/* Show the original registers in the dump. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 			syscall_rollback(current, current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 			/* Trigger a manual coredump since do_exit skips it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 			seccomp_init_siginfo(&info, this_syscall, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 			do_coredump(&info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		if (action == SECCOMP_RET_KILL_THREAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 			do_exit(SIGSYS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 			do_group_exit(SIGSYS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	unreachable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) skip:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	seccomp_log(this_syscall, 0, action, match ? match->log : false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 			    const bool recheck_after_trace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) int __secure_computing(const struct seccomp_data *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	int mode = current->seccomp.mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	int this_syscall;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	    unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	this_syscall = sd ? sd->nr :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 		syscall_get_nr(current, current_pt_regs());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	switch (mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	case SECCOMP_MODE_STRICT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		__secure_computing_strict(this_syscall);  /* may call do_exit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	case SECCOMP_MODE_FILTER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 		return __seccomp_filter(this_syscall, sd, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) long prctl_get_seccomp(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	return current->seccomp.mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)  * seccomp_set_mode_strict: internal function for setting strict seccomp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)  * Once current->seccomp.mode is non-zero, it may not be changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)  * Returns 0 on success or -EINVAL on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) static long seccomp_set_mode_strict(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	spin_lock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	if (!seccomp_may_assign_mode(seccomp_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) #ifdef TIF_NOTSC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	disable_TSC();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	seccomp_assign_mode(current, seccomp_mode, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	spin_unlock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) #ifdef CONFIG_SECCOMP_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) static void seccomp_notify_free(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	kfree(filter->notif);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	filter->notif = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) static void seccomp_notify_detach(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	struct seccomp_knotif *knotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	if (!filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	mutex_lock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	 * If this file is being closed because e.g. the task who owned it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	 * died, let's wake everyone up who was waiting on us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	list_for_each_entry(knotif, &filter->notif->notifications, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		if (knotif->state == SECCOMP_NOTIFY_REPLIED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 		knotif->state = SECCOMP_NOTIFY_REPLIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 		knotif->error = -ENOSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 		knotif->val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		 * We do not need to wake up any pending addfd messages, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		 * the notifier will do that for us, as this just looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 		 * like a standard reply.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		complete(&knotif->ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	seccomp_notify_free(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static int seccomp_notify_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	struct seccomp_filter *filter = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	seccomp_notify_detach(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	__put_seccomp_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) /* must be called with notif_lock held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) static inline struct seccomp_knotif *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) find_notification(struct seccomp_filter *filter, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	struct seccomp_knotif *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	lockdep_assert_held(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	list_for_each_entry(cur, &filter->notif->notifications, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 		if (cur->id == id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 			return cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) static long seccomp_notify_recv(struct seccomp_filter *filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 				void __user *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	struct seccomp_knotif *knotif = NULL, *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	struct seccomp_notif unotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	/* Verify that we're not given garbage to keep struct extensible. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	ret = check_zeroed_user(buf, sizeof(unotif));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	memset(&unotif, 0, sizeof(unotif));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	ret = down_interruptible(&filter->notif->request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	mutex_lock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	list_for_each_entry(cur, &filter->notif->notifications, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		if (cur->state == SECCOMP_NOTIFY_INIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			knotif = cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	 * If we didn't find a notification, it could be that the task was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	 * interrupted by a fatal signal between the time we were woken and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	 * when we were able to acquire the rw lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	if (!knotif) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	unotif.id = knotif->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	unotif.pid = task_pid_vnr(knotif->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	unotif.data = *(knotif->data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 	knotif->state = SECCOMP_NOTIFY_SENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		 * Userspace screwed up. To make sure that we keep this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 		 * notification alive, let's reset it back to INIT. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		 * may have died when we released the lock, so we need to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		 * sure it's still around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 		mutex_lock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 		knotif = find_notification(filter, unotif.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 		if (knotif) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 			knotif->state = SECCOMP_NOTIFY_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 			up(&filter->notif->request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 		mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) static long seccomp_notify_send(struct seccomp_filter *filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 				void __user *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	struct seccomp_notif_resp resp = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	struct seccomp_knotif *knotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	if (copy_from_user(&resp, buf, sizeof(resp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	    (resp.error || resp.val))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	ret = mutex_lock_interruptible(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	knotif = find_notification(filter, resp.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	if (!knotif) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	/* Allow exactly one reply. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	if (knotif->state != SECCOMP_NOTIFY_SENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 		ret = -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	knotif->state = SECCOMP_NOTIFY_REPLIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	knotif->error = resp.error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	knotif->val = resp.val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	knotif->flags = resp.flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	complete(&knotif->ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) static long seccomp_notify_id_valid(struct seccomp_filter *filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 				    void __user *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	struct seccomp_knotif *knotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	u64 id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	if (copy_from_user(&id, buf, sizeof(id)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	ret = mutex_lock_interruptible(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	knotif = find_notification(filter, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) static long seccomp_notify_addfd(struct seccomp_filter *filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 				 struct seccomp_notif_addfd __user *uaddfd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 				 unsigned int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	struct seccomp_notif_addfd addfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	struct seccomp_knotif *knotif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	struct seccomp_kaddfd kaddfd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 	BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 	BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	if (addfd.newfd_flags & ~O_CLOEXEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 	if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	kaddfd.file = fget(addfd.srcfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	if (!kaddfd.file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	kaddfd.flags = addfd.newfd_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	kaddfd.fd = (addfd.flags & SECCOMP_ADDFD_FLAG_SETFD) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 		    addfd.newfd : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	init_completion(&kaddfd.completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	ret = mutex_lock_interruptible(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	knotif = find_notification(filter, addfd.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	if (!knotif) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	 * We do not want to allow for FD injection to occur before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	 * notification has been picked up by a userspace handler, or after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	 * the notification has been replied to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	if (knotif->state != SECCOMP_NOTIFY_SENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		ret = -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	list_add(&kaddfd.list, &knotif->addfd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	complete(&knotif->ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	/* Now we wait for it to be processed or be interrupted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	ret = wait_for_completion_interruptible(&kaddfd.completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	if (ret == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 		 * We had a successful completion. The other side has already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		 * removed us from the addfd queue, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 		 * wait_for_completion_interruptible has a memory barrier upon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 		 * success that lets us read this value directly without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 		 * locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 		ret = kaddfd.ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	mutex_lock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 	 * Even though we were woken up by a signal and not a successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	 * completion, a completion may have happened in the mean time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	 * We need to check again if the addfd request has been handled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	 * and if not, we will remove it from the queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	if (list_empty(&kaddfd.list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 		ret = kaddfd.ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 		list_del(&kaddfd.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	fput(kaddfd.file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 				 unsigned long arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	struct seccomp_filter *filter = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	void __user *buf = (void __user *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	/* Fixed-size ioctls */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	switch (cmd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	case SECCOMP_IOCTL_NOTIF_RECV:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 		return seccomp_notify_recv(filter, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	case SECCOMP_IOCTL_NOTIF_SEND:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 		return seccomp_notify_send(filter, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	case SECCOMP_IOCTL_NOTIF_ID_VALID:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		return seccomp_notify_id_valid(filter, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	/* Extensible Argument ioctls */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) #define EA_IOCTL(cmd)	((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 	switch (EA_IOCTL(cmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 	case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 		return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) static __poll_t seccomp_notify_poll(struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 				    struct poll_table_struct *poll_tab)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	struct seccomp_filter *filter = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	__poll_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	struct seccomp_knotif *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 	poll_wait(file, &filter->wqh, poll_tab);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	if (mutex_lock_interruptible(&filter->notify_lock) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 		return EPOLLERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	list_for_each_entry(cur, &filter->notif->notifications, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		if (cur->state == SECCOMP_NOTIFY_INIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 			ret |= EPOLLIN | EPOLLRDNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		if (cur->state == SECCOMP_NOTIFY_SENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			ret |= EPOLLOUT | EPOLLWRNORM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 		if ((ret & EPOLLIN) && (ret & EPOLLOUT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	mutex_unlock(&filter->notify_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	if (refcount_read(&filter->users) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 		ret |= EPOLLHUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) static const struct file_operations seccomp_notify_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	.poll = seccomp_notify_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	.release = seccomp_notify_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	.unlocked_ioctl = seccomp_notify_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	.compat_ioctl = seccomp_notify_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) static struct file *init_listener(struct seccomp_filter *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	struct file *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	ret = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	if (!filter->notif)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	sema_init(&filter->notif->request, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	filter->notif->next_id = get_random_u64();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	INIT_LIST_HEAD(&filter->notif->notifications);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 				 filter, O_RDWR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	if (IS_ERR(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		goto out_notif;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	/* The file has a reference to it now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	__get_seccomp_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) out_notif:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	if (IS_ERR(ret))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		seccomp_notify_free(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)  * Does @new_child have a listener while an ancestor also has a listener?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741)  * If so, we'll want to reject this filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742)  * This only has to be tested for the current process, even in the TSYNC case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743)  * because TSYNC installs @child with the same parent on all threads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744)  * Note that @new_child is not hooked up to its parent at this point yet, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)  * we use current->seccomp.filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) static bool has_duplicate_listener(struct seccomp_filter *new_child)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	struct seccomp_filter *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	/* must be protected against concurrent TSYNC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	lockdep_assert_held(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	if (!new_child->notif)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	for (cur = current->seccomp.filter; cur; cur = cur->prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 		if (cur->notif)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)  * seccomp_set_mode_filter: internal function for setting seccomp filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)  * @flags:  flags to change filter behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)  * @filter: struct sock_fprog containing filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)  * This function may be called repeatedly to install additional filters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)  * Every filter successfully installed will be evaluated (in reverse order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)  * for each system call the task makes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)  * Once current->seccomp.mode is non-zero, it may not be changed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)  * Returns 0 on success or -EINVAL on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) static long seccomp_set_mode_filter(unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 				    const char __user *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	struct seccomp_filter *prepared = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	long ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	int listener = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	struct file *listener_f = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	/* Validate flags. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	 * In the successful case, NEW_LISTENER returns the new listener fd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	 * But in the failure case, TSYNC returns the thread that died. If you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	 * combine these two flags, there's no way to tell whether something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	 * succeeded or failed. So, let's disallow this combination if the user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	 * has not explicitly requested no errors from TSYNC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 	if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	    (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	    ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	/* Prepare the new filter before holding any locks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	prepared = seccomp_prepare_user_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	if (IS_ERR(prepared))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 		return PTR_ERR(prepared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 		listener = get_unused_fd_flags(O_CLOEXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		if (listener < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 			ret = listener;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 			goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 		listener_f = init_listener(prepared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 		if (IS_ERR(listener_f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 			put_unused_fd(listener);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 			ret = PTR_ERR(listener_f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 			goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	 * Make sure we cannot change seccomp or nnp state via TSYNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	 * while another thread is in the middle of calling exec.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 	    mutex_lock_killable(&current->signal->cred_guard_mutex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		goto out_put_fd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	spin_lock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	if (!seccomp_may_assign_mode(seccomp_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	if (has_duplicate_listener(prepared)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 		ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 	ret = seccomp_attach_filter(flags, prepared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	/* Do not free the successfully attached filter. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	prepared = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	seccomp_assign_mode(current, seccomp_mode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	spin_unlock_irq(&current->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		mutex_unlock(&current->signal->cred_guard_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) out_put_fd:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 			listener_f->private_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 			fput(listener_f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 			put_unused_fd(listener);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 			seccomp_notify_detach(prepared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 			fd_install(listener, listener_f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 			ret = listener;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	seccomp_filter_free(prepared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) static inline long seccomp_set_mode_filter(unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 					   const char __user *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) static long seccomp_get_action_avail(const char __user *uaction)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	u32 action;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 	if (copy_from_user(&action, uaction, sizeof(action)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	switch (action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	case SECCOMP_RET_KILL_PROCESS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	case SECCOMP_RET_KILL_THREAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	case SECCOMP_RET_TRAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	case SECCOMP_RET_ERRNO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	case SECCOMP_RET_USER_NOTIF:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	case SECCOMP_RET_TRACE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	case SECCOMP_RET_LOG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	case SECCOMP_RET_ALLOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) static long seccomp_get_notif_sizes(void __user *usizes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	struct seccomp_notif_sizes sizes = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		.seccomp_notif = sizeof(struct seccomp_notif),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 		.seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 		.seccomp_data = sizeof(struct seccomp_data),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	if (copy_to_user(usizes, &sizes, sizeof(sizes)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) /* Common entry point for both prctl and syscall. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) static long do_seccomp(unsigned int op, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		       void __user *uargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 	switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 	case SECCOMP_SET_MODE_STRICT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 		if (flags != 0 || uargs != NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 		return seccomp_set_mode_strict();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	case SECCOMP_SET_MODE_FILTER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 		return seccomp_set_mode_filter(flags, uargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	case SECCOMP_GET_ACTION_AVAIL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		if (flags != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		return seccomp_get_action_avail(uargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 	case SECCOMP_GET_NOTIF_SIZES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		if (flags != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		return seccomp_get_notif_sizes(uargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 			 void __user *, uargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	return do_seccomp(op, flags, uargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)  * prctl_set_seccomp: configures current->seccomp.mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)  * @seccomp_mode: requested mode to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948)  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)  * Returns 0 on success or -EINVAL on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	unsigned int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	void __user *uargs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	switch (seccomp_mode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	case SECCOMP_MODE_STRICT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 		op = SECCOMP_SET_MODE_STRICT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 		 * Setting strict mode through prctl always ignored filter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 		 * so make sure it is always NULL here to pass the internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		 * check in do_seccomp().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 		uargs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 	case SECCOMP_MODE_FILTER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 		op = SECCOMP_SET_MODE_FILTER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 		uargs = filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	/* prctl interface doesn't have flags, so they are always zero. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	return do_seccomp(op, 0, uargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) static struct seccomp_filter *get_nth_filter(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 					     unsigned long filter_off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	struct seccomp_filter *orig, *filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	unsigned long count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	 * Note: this is only correct because the caller should be the (ptrace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	 * tracer of the task, otherwise lock_task_sighand is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	spin_lock_irq(&task->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 	if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 		spin_unlock_irq(&task->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	orig = task->seccomp.filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	__get_seccomp_filter(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	spin_unlock_irq(&task->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	for (filter = orig; filter; filter = filter->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 		count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	if (filter_off >= count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 		filter = ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 	count -= filter_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	for (filter = orig; filter && count > 1; filter = filter->prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 		count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	if (WARN_ON(count != 1 || !filter)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 		filter = ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	__get_seccomp_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 	__put_seccomp_filter(orig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	return filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 			void __user *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	struct seccomp_filter *filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 	struct sock_fprog_kern *fprog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	if (!capable(CAP_SYS_ADMIN) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 	    current->seccomp.mode != SECCOMP_MODE_DISABLED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 		return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	filter = get_nth_filter(task, filter_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	if (IS_ERR(filter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 		return PTR_ERR(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	fprog = filter->prog->orig_prog;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	if (!fprog) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 		/* This must be a new non-cBPF filter, since we save
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 		 * every cBPF filter's orig_prog above when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		 * CONFIG_CHECKPOINT_RESTORE is enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		ret = -EMEDIUMTYPE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	ret = fprog->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	__put_seccomp_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) long seccomp_get_metadata(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 			  unsigned long size, void __user *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	struct seccomp_filter *filter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	struct seccomp_metadata kmd = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	if (!capable(CAP_SYS_ADMIN) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	    current->seccomp.mode != SECCOMP_MODE_DISABLED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 		return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	size = min_t(unsigned long, size, sizeof(kmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 	if (size < sizeof(kmd.filter_off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 	filter = get_nth_filter(task, kmd.filter_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 	if (IS_ERR(filter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 		return PTR_ERR(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	if (filter->log)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 		kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	ret = size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	if (copy_to_user(data, &kmd, size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 		ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	__put_seccomp_filter(filter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) #ifdef CONFIG_SYSCTL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) /* Human readable action names for friendly sysctl interaction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) #define SECCOMP_RET_KILL_PROCESS_NAME	"kill_process"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) #define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) #define SECCOMP_RET_TRAP_NAME		"trap"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) #define SECCOMP_RET_ERRNO_NAME		"errno"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) #define SECCOMP_RET_USER_NOTIF_NAME	"user_notif"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) #define SECCOMP_RET_TRACE_NAME		"trace"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) #define SECCOMP_RET_LOG_NAME		"log"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) #define SECCOMP_RET_ALLOW_NAME		"allow"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) static const char seccomp_actions_avail[] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 				SECCOMP_RET_KILL_PROCESS_NAME	" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 				SECCOMP_RET_KILL_THREAD_NAME	" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 				SECCOMP_RET_TRAP_NAME		" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 				SECCOMP_RET_ERRNO_NAME		" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 				SECCOMP_RET_USER_NOTIF_NAME     " "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 				SECCOMP_RET_TRACE_NAME		" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 				SECCOMP_RET_LOG_NAME		" "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 				SECCOMP_RET_ALLOW_NAME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) struct seccomp_log_name {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	u32		log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	const char	*name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) static const struct seccomp_log_name seccomp_log_names[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	{ SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	{ SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 	{ SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 	{ }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) static bool seccomp_names_from_actions_logged(char *names, size_t size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 					      u32 actions_logged,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 					      const char *sep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	const struct seccomp_log_name *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	bool append_sep = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	for (cur = seccomp_log_names; cur->name && size; cur++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 		ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 		if (!(actions_logged & cur->log))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 		if (append_sep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 			ret = strscpy(names, sep, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 				return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 			names += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 			size -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 			append_sep = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		ret = strscpy(names, cur->name, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		names += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 		size -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) static bool seccomp_action_logged_from_name(u32 *action_logged,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 					    const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	const struct seccomp_log_name *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	for (cur = seccomp_log_names; cur->name; cur++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 		if (!strcmp(cur->name, name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 			*action_logged = cur->log;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	char *name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	*actions_logged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 	while ((name = strsep(&names, " ")) && *name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 		u32 action_logged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 		if (!seccomp_action_logged_from_name(&action_logged, name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		*actions_logged |= action_logged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 			       size_t *lenp, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	char names[sizeof(seccomp_actions_avail)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	struct ctl_table table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 	memset(names, 0, sizeof(names));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 	if (!seccomp_names_from_actions_logged(names, sizeof(names),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 					       seccomp_actions_logged, " "))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 	table = *ro_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	table.data = names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 	table.maxlen = sizeof(names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 	return proc_dostring(&table, 0, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 				size_t *lenp, loff_t *ppos, u32 *actions_logged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	char names[sizeof(seccomp_actions_avail)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 	struct ctl_table table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	if (!capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 		return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	memset(names, 0, sizeof(names));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	table = *ro_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	table.data = names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	table.maxlen = sizeof(names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	ret = proc_dostring(&table, 1, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	if (!seccomp_actions_logged_from_names(actions_logged, table.data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 	if (*actions_logged & SECCOMP_LOG_ALLOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	seccomp_actions_logged = *actions_logged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 				 int ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 	char names[sizeof(seccomp_actions_avail)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 	char old_names[sizeof(seccomp_actions_avail)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 	const char *new = names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	const char *old = old_names;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	if (!audit_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	memset(names, 0, sizeof(names));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	memset(old_names, 0, sizeof(old_names));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 		new = "?";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 	else if (!actions_logged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 		new = "(none)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 	else if (!seccomp_names_from_actions_logged(names, sizeof(names),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 						    actions_logged, ","))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		new = "?";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	if (!old_actions_logged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 		old = "(none)";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 	else if (!seccomp_names_from_actions_logged(old_names,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 						    sizeof(old_names),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 						    old_actions_logged, ","))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		old = "?";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	return audit_seccomp_actions_logged(new, old, !ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 					  void *buffer, size_t *lenp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 					  loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 		u32 actions_logged = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		u32 old_actions_logged = seccomp_actions_logged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 		ret = write_actions_logged(ro_table, buffer, lenp, ppos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 					   &actions_logged);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 		audit_actions_logged(actions_logged, old_actions_logged, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 		ret = read_actions_logged(ro_table, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) static struct ctl_path seccomp_sysctl_path[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	{ .procname = "kernel", },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	{ .procname = "seccomp", },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	{ }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) static struct ctl_table seccomp_sysctl_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 		.procname	= "actions_avail",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 		.data		= (void *) &seccomp_actions_avail,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 		.maxlen		= sizeof(seccomp_actions_avail),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 		.mode		= 0444,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 		.proc_handler	= proc_dostring,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 		.procname	= "actions_logged",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		.mode		= 0644,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 		.proc_handler	= seccomp_actions_logged_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	{ }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) static int __init seccomp_sysctl_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	struct ctl_table_header *hdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	if (!hdr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 		pr_warn("sysctl registration failed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 		kmemleak_not_leak(hdr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) device_initcall(seccomp_sysctl_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) #endif /* CONFIG_SYSCTL */