| |
| |
| |
| |
| #include <linux/cpufreq_times.h> |
| #include "sched.h" |
| #include <trace/hooks/sched.h> |
| |
| #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| DEFINE_PER_CPU(struct irqtime, cpu_irqtime); |
| EXPORT_PER_CPU_SYMBOL_GPL(cpu_irqtime); |
| |
| static int sched_clock_irqtime; |
| |
| void enable_sched_clock_irqtime(void) |
| { |
| <------>sched_clock_irqtime = 1; |
| } |
| |
| void disable_sched_clock_irqtime(void) |
| { |
| <------>sched_clock_irqtime = 0; |
| } |
| |
| static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, |
| <------><------><------><------> enum cpu_usage_stat idx) |
| { |
| <------>u64 *cpustat = kcpustat_this_cpu->cpustat; |
| |
| <------>u64_stats_update_begin(&irqtime->sync); |
| <------>cpustat[idx] += delta; |
| <------>irqtime->total += delta; |
| <------>irqtime->tick_delta += delta; |
| <------>u64_stats_update_end(&irqtime->sync); |
| } |
| |
| |
| |
| |
| |
| void irqtime_account_irq(struct task_struct *curr) |
| { |
| <------>struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
| <------>s64 delta; |
| <------>int cpu; |
| |
| <------>if (!sched_clock_irqtime) |
| <------><------>return; |
| |
| <------>cpu = smp_processor_id(); |
| <------>delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; |
| <------>irqtime->irq_start_time += delta; |
| |
| <------> |
| <------> * We do not account for softirq time from ksoftirqd here. |
| <------> * We want to continue accounting softirq time to ksoftirqd thread |
| <------> * in that case, so as not to confuse scheduler with a special task |
| <------> * that do not consume any time, but still wants to run. |
| <------> */ |
| <------>if (hardirq_count()) |
| <------><------>irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); |
| <------>else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
| <------><------>irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); |
| |
| <------>trace_android_rvh_account_irq(curr, cpu, delta); |
| } |
| EXPORT_SYMBOL_GPL(irqtime_account_irq); |
| |
| static u64 irqtime_tick_accounted(u64 maxtime) |
| { |
| <------>struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
| <------>u64 delta; |
| |
| <------>delta = min(irqtime->tick_delta, maxtime); |
| <------>irqtime->tick_delta -= delta; |
| |
| <------>return delta; |
| } |
| |
| #else |
| |
| #define sched_clock_irqtime (0) |
| |
| static u64 irqtime_tick_accounted(u64 dummy) |
| { |
| <------>return 0; |
| } |
| |
| #endif |
| |
| static inline void task_group_account_field(struct task_struct *p, int index, |
| <------><------><------><------><------> u64 tmp) |
| { |
| <------> |
| <------> * Since all updates are sure to touch the root cgroup, we |
| <------> * get ourselves ahead and touch it first. If the root cgroup |
| <------> * is the only cgroup, then nothing else should be necessary. |
| <------> * |
| <------> */ |
| <------>__this_cpu_add(kernel_cpustat.cpustat[index], tmp); |
| |
| <------>cgroup_account_cputime_field(p, index, tmp); |
| } |
| |
| |
| |
| |
| |
| |
| void account_user_time(struct task_struct *p, u64 cputime) |
| { |
| <------>int index; |
| |
| <------> |
| <------>p->utime += cputime; |
| <------>account_group_user_time(p, cputime); |
| |
| <------>index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
| |
| <------> |
| <------>task_group_account_field(p, index, cputime); |
| |
| <------> |
| <------>acct_account_cputime(p); |
| |
| <------> |
| <------>cpufreq_acct_update_power(p, cputime); |
| } |
| |
| |
| |
| |
| |
| |
| void account_guest_time(struct task_struct *p, u64 cputime) |
| { |
| <------>u64 *cpustat = kcpustat_this_cpu->cpustat; |
| |
| <------> |
| <------>p->utime += cputime; |
| <------>account_group_user_time(p, cputime); |
| <------>p->gtime += cputime; |
| |
| <------> |
| <------>if (task_nice(p) > 0) { |
| <------><------>task_group_account_field(p, CPUTIME_NICE, cputime); |
| <------><------>cpustat[CPUTIME_GUEST_NICE] += cputime; |
| <------>} else { |
| <------><------>task_group_account_field(p, CPUTIME_USER, cputime); |
| <------><------>cpustat[CPUTIME_GUEST] += cputime; |
| <------>} |
| } |
| |
| |
| |
| |
| |
| |
| |
| void account_system_index_time(struct task_struct *p, |
| <------><------><------> u64 cputime, enum cpu_usage_stat index) |
| { |
| <------> |
| <------>p->stime += cputime; |
| <------>account_group_system_time(p, cputime); |
| |
| <------> |
| <------>task_group_account_field(p, index, cputime); |
| |
| <------> |
| <------>acct_account_cputime(p); |
| |
| <------> |
| <------>cpufreq_acct_update_power(p, cputime); |
| } |
| |
| |
| |
| |
| |
| |
| |
| void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) |
| { |
| <------>int index; |
| |
| <------>if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { |
| <------><------>account_guest_time(p, cputime); |
| <------><------>return; |
| <------>} |
| |
| <------>if (hardirq_count() - hardirq_offset) |
| <------><------>index = CPUTIME_IRQ; |
| <------>else if (in_serving_softirq()) |
| <------><------>index = CPUTIME_SOFTIRQ; |
| <------>else |
| <------><------>index = CPUTIME_SYSTEM; |
| |
| <------>account_system_index_time(p, cputime, index); |
| } |
| |
| |
| |
| |
| |
| void account_steal_time(u64 cputime) |
| { |
| <------>u64 *cpustat = kcpustat_this_cpu->cpustat; |
| |
| <------>cpustat[CPUTIME_STEAL] += cputime; |
| } |
| |
| |
| |
| |
| |
| void account_idle_time(u64 cputime) |
| { |
| <------>u64 *cpustat = kcpustat_this_cpu->cpustat; |
| <------>struct rq *rq = this_rq(); |
| |
| <------>if (atomic_read(&rq->nr_iowait) > 0) |
| <------><------>cpustat[CPUTIME_IOWAIT] += cputime; |
| <------>else |
| <------><------>cpustat[CPUTIME_IDLE] += cputime; |
| } |
| |
| |
| |
| |
| |
| |
| static __always_inline u64 steal_account_process_time(u64 maxtime) |
| { |
| #ifdef CONFIG_PARAVIRT |
| <------>if (static_key_false(¶virt_steal_enabled)) { |
| <------><------>u64 steal; |
| |
| <------><------>steal = paravirt_steal_clock(smp_processor_id()); |
| <------><------>steal -= this_rq()->prev_steal_time; |
| <------><------>steal = min(steal, maxtime); |
| <------><------>account_steal_time(steal); |
| <------><------>this_rq()->prev_steal_time += steal; |
| |
| <------><------>return steal; |
| <------>} |
| #endif |
| <------>return 0; |
| } |
| |
| |
| |
| |
| static inline u64 account_other_time(u64 max) |
| { |
| <------>u64 accounted; |
| |
| <------>lockdep_assert_irqs_disabled(); |
| |
| <------>accounted = steal_account_process_time(max); |
| |
| <------>if (accounted < max) |
| <------><------>accounted += irqtime_tick_accounted(max - accounted); |
| |
| <------>return accounted; |
| } |
| |
| #ifdef CONFIG_64BIT |
| static inline u64 read_sum_exec_runtime(struct task_struct *t) |
| { |
| <------>return t->se.sum_exec_runtime; |
| } |
| #else |
| static u64 read_sum_exec_runtime(struct task_struct *t) |
| { |
| <------>u64 ns; |
| <------>struct rq_flags rf; |
| <------>struct rq *rq; |
| |
| <------>rq = task_rq_lock(t, &rf); |
| <------>ns = t->se.sum_exec_runtime; |
| <------>task_rq_unlock(rq, t, &rf); |
| |
| <------>return ns; |
| } |
| #endif |
| |
| |
| |
| |
| |
| void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) |
| { |
| <------>struct signal_struct *sig = tsk->signal; |
| <------>u64 utime, stime; |
| <------>struct task_struct *t; |
| <------>unsigned int seq, nextseq; |
| <------>unsigned long flags; |
| |
| <------> |
| <------> * Update current task runtime to account pending time since last |
| <------> * scheduler action or thread_group_cputime() call. This thread group |
| <------> * might have other running tasks on different CPUs, but updating |
| <------> * their runtime can affect syscall performance, so we skip account |
| <------> * those pending times and rely only on values updated on tick or |
| <------> * other scheduler action. |
| <------> */ |
| <------>if (same_thread_group(current, tsk)) |
| <------><------>(void) task_sched_runtime(current); |
| |
| <------>rcu_read_lock(); |
| <------> |
| <------>nextseq = 0; |
| <------>do { |
| <------><------>seq = nextseq; |
| <------><------>flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); |
| <------><------>times->utime = sig->utime; |
| <------><------>times->stime = sig->stime; |
| <------><------>times->sum_exec_runtime = sig->sum_sched_runtime; |
| |
| <------><------>for_each_thread(tsk, t) { |
| <------><------><------>task_cputime(t, &utime, &stime); |
| <------><------><------>times->utime += utime; |
| <------><------><------>times->stime += stime; |
| <------><------><------>times->sum_exec_runtime += read_sum_exec_runtime(t); |
| <------><------>} |
| <------><------> |
| <------><------>nextseq = 1; |
| <------>} while (need_seqretry(&sig->stats_lock, seq)); |
| <------>done_seqretry_irqrestore(&sig->stats_lock, seq, flags); |
| <------>rcu_read_unlock(); |
| } |
| |
| #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
| <------><------><------><------><------> int ticks) |
| { |
| <------>u64 other, cputime = TICK_NSEC * ticks; |
| |
| <------> |
| <------> * When returning from idle, many ticks can get accounted at |
| <------> * once, including some ticks of steal, irq, and softirq time. |
| <------> * Subtract those ticks from the amount of time accounted to |
| <------> * idle, or potentially user or system time. Due to rounding, |
| <------> * other time can exceed ticks occasionally. |
| <------> */ |
| <------>other = account_other_time(ULONG_MAX); |
| <------>if (other >= cputime) |
| <------><------>return; |
| |
| <------>cputime -= other; |
| |
| <------>if (this_cpu_ksoftirqd() == p) { |
| <------><------> |
| <------><------> * ksoftirqd time do not get accounted in cpu_softirq_time. |
| <------><------> * So, we have to handle it separately here. |
| <------><------> * Also, p->stime needs to be updated for ksoftirqd. |
| <------><------> */ |
| <------><------>account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); |
| <------>} else if (user_tick) { |
| <------><------>account_user_time(p, cputime); |
| <------>} else if (p == this_rq()->idle) { |
| <------><------>account_idle_time(cputime); |
| <------>} else if (p->flags & PF_VCPU) { |
| <------><------>account_guest_time(p, cputime); |
| <------>} else { |
| <------><------>account_system_index_time(p, cputime, CPUTIME_SYSTEM); |
| <------>} |
| <------>trace_android_vh_irqtime_account_process_tick(p, this_rq(), user_tick, ticks); |
| } |
| |
| static void irqtime_account_idle_ticks(int ticks) |
| { |
| <------>irqtime_account_process_tick(current, 0, ticks); |
| } |
| #else |
| static inline void irqtime_account_idle_ticks(int ticks) { } |
| static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
| <------><------><------><------><------><------>int nr_ticks) { } |
| #endif |
| |
| |
| |
| |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
| |
| # ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
| void vtime_task_switch(struct task_struct *prev) |
| { |
| <------>if (is_idle_task(prev)) |
| <------><------>vtime_account_idle(prev); |
| <------>else |
| <------><------>vtime_account_kernel(prev); |
| |
| <------>vtime_flush(prev); |
| <------>arch_vtime_task_switch(prev); |
| } |
| # endif |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| #ifndef __ARCH_HAS_VTIME_ACCOUNT |
| void vtime_account_irq_enter(struct task_struct *tsk) |
| { |
| <------>if (!in_interrupt() && is_idle_task(tsk)) |
| <------><------>vtime_account_idle(tsk); |
| <------>else |
| <------><------>vtime_account_kernel(tsk); |
| } |
| EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
| #endif |
| |
| void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, |
| <------><------> u64 *ut, u64 *st) |
| { |
| <------>*ut = curr->utime; |
| <------>*st = curr->stime; |
| } |
| |
| void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
| { |
| <------>*ut = p->utime; |
| <------>*st = p->stime; |
| } |
| EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
| |
| void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
| { |
| <------>struct task_cputime cputime; |
| |
| <------>thread_group_cputime(p, &cputime); |
| |
| <------>*ut = cputime.utime; |
| <------>*st = cputime.stime; |
| } |
| EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted); |
| |
| #else |
| |
| |
| |
| |
| |
| |
| void account_process_tick(struct task_struct *p, int user_tick) |
| { |
| <------>u64 cputime, steal; |
| |
| <------>if (vtime_accounting_enabled_this_cpu()) |
| <------><------>return; |
| <------>trace_android_vh_account_task_time(p, this_rq(), user_tick); |
| |
| <------>if (sched_clock_irqtime) { |
| <------><------>irqtime_account_process_tick(p, user_tick, 1); |
| <------><------>return; |
| <------>} |
| |
| <------>cputime = TICK_NSEC; |
| <------>steal = steal_account_process_time(ULONG_MAX); |
| |
| <------>if (steal >= cputime) |
| <------><------>return; |
| |
| <------>cputime -= steal; |
| |
| <------>if (user_tick) |
| <------><------>account_user_time(p, cputime); |
| <------>else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| <------><------>account_system_time(p, HARDIRQ_OFFSET, cputime); |
| <------>else |
| <------><------>account_idle_time(cputime); |
| } |
| |
| |
| |
| |
| |
| void account_idle_ticks(unsigned long ticks) |
| { |
| <------>u64 cputime, steal; |
| |
| <------>if (sched_clock_irqtime) { |
| <------><------>irqtime_account_idle_ticks(ticks); |
| <------><------>return; |
| <------>} |
| |
| <------>cputime = ticks * TICK_NSEC; |
| <------>steal = steal_account_process_time(ULONG_MAX); |
| |
| <------>if (steal >= cputime) |
| <------><------>return; |
| |
| <------>cputime -= steal; |
| <------>account_idle_time(cputime); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, |
| <------><------> u64 *ut, u64 *st) |
| { |
| <------>u64 rtime, stime, utime; |
| <------>unsigned long flags; |
| |
| <------> |
| <------>raw_spin_lock_irqsave(&prev->lock, flags); |
| <------>rtime = curr->sum_exec_runtime; |
| |
| <------> |
| <------> * This is possible under two circumstances: |
| <------> * - rtime isn't monotonic after all (a bug); |
| <------> * - we got reordered by the lock. |
| <------> * |
| <------> * In both cases this acts as a filter such that the rest of the code |
| <------> * can assume it is monotonic regardless of anything else. |
| <------> */ |
| <------>if (prev->stime + prev->utime >= rtime) |
| <------><------>goto out; |
| |
| <------>stime = curr->stime; |
| <------>utime = curr->utime; |
| |
| <------> |
| <------> * If either stime or utime are 0, assume all runtime is userspace. |
| <------> * Once a task gets some ticks, the monotonicy code at 'update:' |
| <------> * will ensure things converge to the observed ratio. |
| <------> */ |
| <------>if (stime == 0) { |
| <------><------>utime = rtime; |
| <------><------>goto update; |
| <------>} |
| |
| <------>if (utime == 0) { |
| <------><------>stime = rtime; |
| <------><------>goto update; |
| <------>} |
| |
| <------>stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); |
| |
| update: |
| <------> |
| <------> * Make sure stime doesn't go backwards; this preserves monotonicity |
| <------> * for utime because rtime is monotonic. |
| <------> * |
| <------> * utime_i+1 = rtime_i+1 - stime_i |
| <------> * = rtime_i+1 - (rtime_i - utime_i) |
| <------> * = (rtime_i+1 - rtime_i) + utime_i |
| <------> * >= utime_i |
| <------> */ |
| <------>if (stime < prev->stime) |
| <------><------>stime = prev->stime; |
| <------>utime = rtime - stime; |
| |
| <------> |
| <------> * Make sure utime doesn't go backwards; this still preserves |
| <------> * monotonicity for stime, analogous argument to above. |
| <------> */ |
| <------>if (utime < prev->utime) { |
| <------><------>utime = prev->utime; |
| <------><------>stime = rtime - utime; |
| <------>} |
| |
| <------>prev->stime = stime; |
| <------>prev->utime = utime; |
| out: |
| <------>*ut = prev->utime; |
| <------>*st = prev->stime; |
| <------>raw_spin_unlock_irqrestore(&prev->lock, flags); |
| } |
| |
| void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
| { |
| <------>struct task_cputime cputime = { |
| <------><------>.sum_exec_runtime = p->se.sum_exec_runtime, |
| <------>}; |
| |
| <------>task_cputime(p, &cputime.utime, &cputime.stime); |
| <------>cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
| } |
| EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
| |
| void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
| { |
| <------>struct task_cputime cputime; |
| |
| <------>thread_group_cputime(p, &cputime); |
| <------>cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
| } |
| EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted); |
| |
| #endif |
| |
| #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN |
| static u64 vtime_delta(struct vtime *vtime) |
| { |
| <------>unsigned long long clock; |
| |
| <------>clock = sched_clock(); |
| <------>if (clock < vtime->starttime) |
| <------><------>return 0; |
| |
| <------>return clock - vtime->starttime; |
| } |
| |
| static u64 get_vtime_delta(struct vtime *vtime) |
| { |
| <------>u64 delta = vtime_delta(vtime); |
| <------>u64 other; |
| |
| <------> |
| <------> * Unlike tick based timing, vtime based timing never has lost |
| <------> * ticks, and no need for steal time accounting to make up for |
| <------> * lost ticks. Vtime accounts a rounded version of actual |
| <------> * elapsed time. Limit account_other_time to prevent rounding |
| <------> * errors from causing elapsed vtime to go negative. |
| <------> */ |
| <------>other = account_other_time(delta); |
| <------>WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); |
| <------>vtime->starttime += delta; |
| |
| <------>return delta - other; |
| } |
| |
| static void vtime_account_system(struct task_struct *tsk, |
| <------><------><------><------> struct vtime *vtime) |
| { |
| <------>vtime->stime += get_vtime_delta(vtime); |
| <------>if (vtime->stime >= TICK_NSEC) { |
| <------><------>account_system_time(tsk, irq_count(), vtime->stime); |
| <------><------>vtime->stime = 0; |
| <------>} |
| } |
| |
| static void vtime_account_guest(struct task_struct *tsk, |
| <------><------><------><------>struct vtime *vtime) |
| { |
| <------>vtime->gtime += get_vtime_delta(vtime); |
| <------>if (vtime->gtime >= TICK_NSEC) { |
| <------><------>account_guest_time(tsk, vtime->gtime); |
| <------><------>vtime->gtime = 0; |
| <------>} |
| } |
| |
| static void __vtime_account_kernel(struct task_struct *tsk, |
| <------><------><------><------> struct vtime *vtime) |
| { |
| <------> |
| <------>if (vtime->state == VTIME_GUEST) |
| <------><------>vtime_account_guest(tsk, vtime); |
| <------>else |
| <------><------>vtime_account_system(tsk, vtime); |
| } |
| |
| void vtime_account_kernel(struct task_struct *tsk) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| |
| <------>if (!vtime_delta(vtime)) |
| <------><------>return; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>__vtime_account_kernel(tsk, vtime); |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| |
| void vtime_user_enter(struct task_struct *tsk) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>vtime_account_system(tsk, vtime); |
| <------>vtime->state = VTIME_USER; |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| |
| void vtime_user_exit(struct task_struct *tsk) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>vtime->utime += get_vtime_delta(vtime); |
| <------>if (vtime->utime >= TICK_NSEC) { |
| <------><------>account_user_time(tsk, vtime->utime); |
| <------><------>vtime->utime = 0; |
| <------>} |
| <------>vtime->state = VTIME_SYS; |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| |
| void vtime_guest_enter(struct task_struct *tsk) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| <------> |
| <------> * The flags must be updated under the lock with |
| <------> * the vtime_starttime flush and update. |
| <------> * That enforces a right ordering and update sequence |
| <------> * synchronization against the reader (task_gtime()) |
| <------> * that can thus safely catch up with a tickless delta. |
| <------> */ |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>vtime_account_system(tsk, vtime); |
| <------>tsk->flags |= PF_VCPU; |
| <------>vtime->state = VTIME_GUEST; |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| EXPORT_SYMBOL_GPL(vtime_guest_enter); |
| |
| void vtime_guest_exit(struct task_struct *tsk) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>vtime_account_guest(tsk, vtime); |
| <------>tsk->flags &= ~PF_VCPU; |
| <------>vtime->state = VTIME_SYS; |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| EXPORT_SYMBOL_GPL(vtime_guest_exit); |
| |
| void vtime_account_idle(struct task_struct *tsk) |
| { |
| <------>account_idle_time(get_vtime_delta(&tsk->vtime)); |
| } |
| |
| void vtime_task_switch_generic(struct task_struct *prev) |
| { |
| <------>struct vtime *vtime = &prev->vtime; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>if (vtime->state == VTIME_IDLE) |
| <------><------>vtime_account_idle(prev); |
| <------>else |
| <------><------>__vtime_account_kernel(prev, vtime); |
| <------>vtime->state = VTIME_INACTIVE; |
| <------>vtime->cpu = -1; |
| <------>write_seqcount_end(&vtime->seqcount); |
| |
| <------>vtime = ¤t->vtime; |
| |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>if (is_idle_task(current)) |
| <------><------>vtime->state = VTIME_IDLE; |
| <------>else if (current->flags & PF_VCPU) |
| <------><------>vtime->state = VTIME_GUEST; |
| <------>else |
| <------><------>vtime->state = VTIME_SYS; |
| <------>vtime->starttime = sched_clock(); |
| <------>vtime->cpu = smp_processor_id(); |
| <------>write_seqcount_end(&vtime->seqcount); |
| } |
| |
| void vtime_init_idle(struct task_struct *t, int cpu) |
| { |
| <------>struct vtime *vtime = &t->vtime; |
| <------>unsigned long flags; |
| |
| <------>local_irq_save(flags); |
| <------>write_seqcount_begin(&vtime->seqcount); |
| <------>vtime->state = VTIME_IDLE; |
| <------>vtime->starttime = sched_clock(); |
| <------>vtime->cpu = cpu; |
| <------>write_seqcount_end(&vtime->seqcount); |
| <------>local_irq_restore(flags); |
| } |
| |
| u64 task_gtime(struct task_struct *t) |
| { |
| <------>struct vtime *vtime = &t->vtime; |
| <------>unsigned int seq; |
| <------>u64 gtime; |
| |
| <------>if (!vtime_accounting_enabled()) |
| <------><------>return t->gtime; |
| |
| <------>do { |
| <------><------>seq = read_seqcount_begin(&vtime->seqcount); |
| |
| <------><------>gtime = t->gtime; |
| <------><------>if (vtime->state == VTIME_GUEST) |
| <------><------><------>gtime += vtime->gtime + vtime_delta(vtime); |
| |
| <------>} while (read_seqcount_retry(&vtime->seqcount, seq)); |
| |
| <------>return gtime; |
| } |
| |
| |
| |
| |
| |
| |
| void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) |
| { |
| <------>struct vtime *vtime = &t->vtime; |
| <------>unsigned int seq; |
| <------>u64 delta; |
| |
| <------>if (!vtime_accounting_enabled()) { |
| <------><------>*utime = t->utime; |
| <------><------>*stime = t->stime; |
| <------><------>return; |
| <------>} |
| |
| <------>do { |
| <------><------>seq = read_seqcount_begin(&vtime->seqcount); |
| |
| <------><------>*utime = t->utime; |
| <------><------>*stime = t->stime; |
| |
| <------><------> |
| <------><------>if (vtime->state < VTIME_SYS) |
| <------><------><------>continue; |
| |
| <------><------>delta = vtime_delta(vtime); |
| |
| <------><------> |
| <------><------> * Task runs either in user (including guest) or kernel space, |
| <------><------> * add pending nohz time to the right place. |
| <------><------> */ |
| <------><------>if (vtime->state == VTIME_SYS) |
| <------><------><------>*stime += vtime->stime + delta; |
| <------><------>else |
| <------><------><------>*utime += vtime->utime + delta; |
| <------>} while (read_seqcount_retry(&vtime->seqcount, seq)); |
| } |
| |
| static int vtime_state_fetch(struct vtime *vtime, int cpu) |
| { |
| <------>int state = READ_ONCE(vtime->state); |
| |
| <------> |
| <------> * We raced against a context switch, fetch the |
| <------> * kcpustat task again. |
| <------> */ |
| <------>if (vtime->cpu != cpu && vtime->cpu != -1) |
| <------><------>return -EAGAIN; |
| |
| <------> |
| <------> * Two possible things here: |
| <------> * 1) We are seeing the scheduling out task (prev) or any past one. |
| <------> * 2) We are seeing the scheduling in task (next) but it hasn't |
| <------> * passed though vtime_task_switch() yet so the pending |
| <------> * cputime of the prev task may not be flushed yet. |
| <------> * |
| <------> * Case 1) is ok but 2) is not. So wait for a safe VTIME state. |
| <------> */ |
| <------>if (state == VTIME_INACTIVE) |
| <------><------>return -EAGAIN; |
| |
| <------>return state; |
| } |
| |
| static u64 kcpustat_user_vtime(struct vtime *vtime) |
| { |
| <------>if (vtime->state == VTIME_USER) |
| <------><------>return vtime->utime + vtime_delta(vtime); |
| <------>else if (vtime->state == VTIME_GUEST) |
| <------><------>return vtime->gtime + vtime_delta(vtime); |
| <------>return 0; |
| } |
| |
| static int kcpustat_field_vtime(u64 *cpustat, |
| <------><------><------><------>struct task_struct *tsk, |
| <------><------><------><------>enum cpu_usage_stat usage, |
| <------><------><------><------>int cpu, u64 *val) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| <------>unsigned int seq; |
| |
| <------>do { |
| <------><------>int state; |
| |
| <------><------>seq = read_seqcount_begin(&vtime->seqcount); |
| |
| <------><------>state = vtime_state_fetch(vtime, cpu); |
| <------><------>if (state < 0) |
| <------><------><------>return state; |
| |
| <------><------>*val = cpustat[usage]; |
| |
| <------><------> |
| <------><------> * Nice VS unnice cputime accounting may be inaccurate if |
| <------><------> * the nice value has changed since the last vtime update. |
| <------><------> * But proper fix would involve interrupting target on nice |
| <------><------> * updates which is a no go on nohz_full (although the scheduler |
| <------><------> * may still interrupt the target if rescheduling is needed...) |
| <------><------> */ |
| <------><------>switch (usage) { |
| <------><------>case CPUTIME_SYSTEM: |
| <------><------><------>if (state == VTIME_SYS) |
| <------><------><------><------>*val += vtime->stime + vtime_delta(vtime); |
| <------><------><------>break; |
| <------><------>case CPUTIME_USER: |
| <------><------><------>if (task_nice(tsk) <= 0) |
| <------><------><------><------>*val += kcpustat_user_vtime(vtime); |
| <------><------><------>break; |
| <------><------>case CPUTIME_NICE: |
| <------><------><------>if (task_nice(tsk) > 0) |
| <------><------><------><------>*val += kcpustat_user_vtime(vtime); |
| <------><------><------>break; |
| <------><------>case CPUTIME_GUEST: |
| <------><------><------>if (state == VTIME_GUEST && task_nice(tsk) <= 0) |
| <------><------><------><------>*val += vtime->gtime + vtime_delta(vtime); |
| <------><------><------>break; |
| <------><------>case CPUTIME_GUEST_NICE: |
| <------><------><------>if (state == VTIME_GUEST && task_nice(tsk) > 0) |
| <------><------><------><------>*val += vtime->gtime + vtime_delta(vtime); |
| <------><------><------>break; |
| <------><------>default: |
| <------><------><------>break; |
| <------><------>} |
| <------>} while (read_seqcount_retry(&vtime->seqcount, seq)); |
| |
| <------>return 0; |
| } |
| |
| u64 kcpustat_field(struct kernel_cpustat *kcpustat, |
| <------><------> enum cpu_usage_stat usage, int cpu) |
| { |
| <------>u64 *cpustat = kcpustat->cpustat; |
| <------>u64 val = cpustat[usage]; |
| <------>struct rq *rq; |
| <------>int err; |
| |
| <------>if (!vtime_accounting_enabled_cpu(cpu)) |
| <------><------>return val; |
| |
| <------>rq = cpu_rq(cpu); |
| |
| <------>for (;;) { |
| <------><------>struct task_struct *curr; |
| |
| <------><------>rcu_read_lock(); |
| <------><------>curr = rcu_dereference(rq->curr); |
| <------><------>if (WARN_ON_ONCE(!curr)) { |
| <------><------><------>rcu_read_unlock(); |
| <------><------><------>return cpustat[usage]; |
| <------><------>} |
| |
| <------><------>err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val); |
| <------><------>rcu_read_unlock(); |
| |
| <------><------>if (!err) |
| <------><------><------>return val; |
| |
| <------><------>cpu_relax(); |
| <------>} |
| } |
| EXPORT_SYMBOL_GPL(kcpustat_field); |
| |
| static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, |
| <------><------><------><------> const struct kernel_cpustat *src, |
| <------><------><------><------> struct task_struct *tsk, int cpu) |
| { |
| <------>struct vtime *vtime = &tsk->vtime; |
| <------>unsigned int seq; |
| |
| <------>do { |
| <------><------>u64 *cpustat; |
| <------><------>u64 delta; |
| <------><------>int state; |
| |
| <------><------>seq = read_seqcount_begin(&vtime->seqcount); |
| |
| <------><------>state = vtime_state_fetch(vtime, cpu); |
| <------><------>if (state < 0) |
| <------><------><------>return state; |
| |
| <------><------>*dst = *src; |
| <------><------>cpustat = dst->cpustat; |
| |
| <------><------> |
| <------><------>if (state < VTIME_SYS) |
| <------><------><------>continue; |
| |
| <------><------>delta = vtime_delta(vtime); |
| |
| <------><------> |
| <------><------> * Task runs either in user (including guest) or kernel space, |
| <------><------> * add pending nohz time to the right place. |
| <------><------> */ |
| <------><------>if (state == VTIME_SYS) { |
| <------><------><------>cpustat[CPUTIME_SYSTEM] += vtime->stime + delta; |
| <------><------>} else if (state == VTIME_USER) { |
| <------><------><------>if (task_nice(tsk) > 0) |
| <------><------><------><------>cpustat[CPUTIME_NICE] += vtime->utime + delta; |
| <------><------><------>else |
| <------><------><------><------>cpustat[CPUTIME_USER] += vtime->utime + delta; |
| <------><------>} else { |
| <------><------><------>WARN_ON_ONCE(state != VTIME_GUEST); |
| <------><------><------>if (task_nice(tsk) > 0) { |
| <------><------><------><------>cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta; |
| <------><------><------><------>cpustat[CPUTIME_NICE] += vtime->gtime + delta; |
| <------><------><------>} else { |
| <------><------><------><------>cpustat[CPUTIME_GUEST] += vtime->gtime + delta; |
| <------><------><------><------>cpustat[CPUTIME_USER] += vtime->gtime + delta; |
| <------><------><------>} |
| <------><------>} |
| <------>} while (read_seqcount_retry(&vtime->seqcount, seq)); |
| |
| <------>return 0; |
| } |
| |
| void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) |
| { |
| <------>const struct kernel_cpustat *src = &kcpustat_cpu(cpu); |
| <------>struct rq *rq; |
| <------>int err; |
| |
| <------>if (!vtime_accounting_enabled_cpu(cpu)) { |
| <------><------>*dst = *src; |
| <------><------>return; |
| <------>} |
| |
| <------>rq = cpu_rq(cpu); |
| |
| <------>for (;;) { |
| <------><------>struct task_struct *curr; |
| |
| <------><------>rcu_read_lock(); |
| <------><------>curr = rcu_dereference(rq->curr); |
| <------><------>if (WARN_ON_ONCE(!curr)) { |
| <------><------><------>rcu_read_unlock(); |
| <------><------><------>*dst = *src; |
| <------><------><------>return; |
| <------><------>} |
| |
| <------><------>err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu); |
| <------><------>rcu_read_unlock(); |
| |
| <------><------>if (!err) |
| <------><------><------>return; |
| |
| <------><------>cpu_relax(); |
| <------>} |
| } |
| EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch); |
| |
| #endif |
| |