Skip to content

Commit bc6bcad

Browse files
Peter Zijlstraelektroschmock
Peter Zijlstra
authored andcommitted
sched, x86: Provide a per-cpu preempt_count implementation
Convert x86 to use a per-cpu preemption count. The reason for doing so is that accessing per-cpu variables is a lot cheaper than accessing thread_info variables. We still need to save/restore the actual preemption count due to PREEMPT_ACTIVE so we place the per-cpu __preempt_count variable in the same cache-line as the other hot __switch_to() variables such as current_task. NOTE: this save/restore is required even for !PREEMPT kernels as cond_resched() also relies on preempt_count's PREEMPT_ACTIVE to ignore task_struct::state. Also rename thread_info::preempt_count to ensure nobody is 'accidentally' still poking at it. Suggested-by: Linus Torvalds <[email protected]> Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/n/[email protected] Signed-off-by: Ingo Molnar <[email protected]> Change-Id: I69b06df535cdbbba80093ea495e0ccb0ea4417d6
1 parent 01d049b commit bc6bcad

File tree

9 files changed

+124
-16
lines changed

9 files changed

+124
-16
lines changed

arch/x86/include/asm/preempt.h

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#ifndef __ASM_PREEMPT_H
2+
#define __ASM_PREEMPT_H
3+
4+
#include <asm/rmwcc.h>
5+
#include <asm/percpu.h>
6+
#include <linux/thread_info.h>
7+
8+
DECLARE_PER_CPU(int, __preempt_count);
9+
10+
/*
11+
* We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users
12+
* that think a non-zero value indicates we cannot preempt.
13+
*/
14+
static __always_inline int preempt_count(void)
15+
{
16+
return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
17+
}
18+
19+
static __always_inline void preempt_count_set(int pc)
20+
{
21+
__this_cpu_write_4(__preempt_count, pc);
22+
}
23+
24+
/*
25+
* must be macros to avoid header recursion hell
26+
*/
27+
#define task_preempt_count(p) \
28+
(task_thread_info(p)->saved_preempt_count & ~PREEMPT_NEED_RESCHED)
29+
30+
#define init_task_preempt_count(p) do { \
31+
task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \
32+
} while (0)
33+
34+
#define init_idle_preempt_count(p, cpu) do { \
35+
task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \
36+
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
37+
} while (0)
38+
39+
/*
40+
* We fold the NEED_RESCHED bit into the preempt count such that
41+
* preempt_enable() can decrement and test for needing to reschedule with a
42+
* single instruction.
43+
*
44+
* We invert the actual bit, so that when the decrement hits 0 we know we both
45+
* need to resched (the bit is cleared) and can resched (no preempt count).
46+
*/
47+
48+
static __always_inline void set_preempt_need_resched(void)
49+
{
50+
__this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
51+
}
52+
53+
static __always_inline void clear_preempt_need_resched(void)
54+
{
55+
__this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
56+
}
57+
58+
static __always_inline bool test_preempt_need_resched(void)
59+
{
60+
return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
61+
}
62+
63+
/*
64+
* The various preempt_count add/sub methods
65+
*/
66+
67+
static __always_inline void __preempt_count_add(int val)
68+
{
69+
__this_cpu_add_4(__preempt_count, val);
70+
}
71+
72+
static __always_inline void __preempt_count_sub(int val)
73+
{
74+
__this_cpu_add_4(__preempt_count, -val);
75+
}
76+
77+
static __always_inline bool __preempt_count_dec_and_test(void)
78+
{
79+
GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
80+
}
81+
82+
/*
83+
* Returns true when we need to resched -- even if we can not.
84+
*/
85+
static __always_inline bool need_resched(void)
86+
{
87+
return unlikely(test_preempt_need_resched());
88+
}
89+
90+
/*
91+
* Returns true when we need to resched and can (barring IRQ state).
92+
*/
93+
static __always_inline bool should_resched(void)
94+
{
95+
return unlikely(!__this_cpu_read_4(__preempt_count));
96+
}
97+
98+
#endif /* __ASM_PREEMPT_H */

arch/x86/include/asm/thread_info.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ struct thread_info {
2828
__u32 flags; /* low level flags */
2929
__u32 status; /* thread synchronous flags */
3030
__u32 cpu; /* current CPU */
31-
int preempt_count; /* 0 => preemptable,
32-
<0 => BUG */
31+
int saved_preempt_count;
3332
mm_segment_t addr_limit;
3433
void __user *sysenter_return;
3534
#ifdef CONFIG_X86_32
@@ -48,7 +47,7 @@ struct thread_info {
4847
.exec_domain = &default_exec_domain, \
4948
.flags = 0, \
5049
.cpu = 0, \
51-
.preempt_count = INIT_PREEMPT_COUNT, \
50+
.saved_preempt_count = INIT_PREEMPT_COUNT, \
5251
.addr_limit = KERNEL_DS, \
5352
}
5453

arch/x86/kernel/asm-offsets.c

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ void common(void) {
3232
OFFSET(TI_flags, thread_info, flags);
3333
OFFSET(TI_status, thread_info, status);
3434
OFFSET(TI_addr_limit, thread_info, addr_limit);
35-
OFFSET(TI_preempt_count, thread_info, preempt_count);
3635

3736
BLANK();
3837
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);

arch/x86/kernel/cpu/common.c

+5
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
11001100

11011101
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
11021102

1103+
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1104+
EXPORT_PER_CPU_SYMBOL(__preempt_count);
1105+
11031106
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
11041107

11051108
/*
@@ -1174,6 +1177,8 @@ void debug_stack_reset(void)
11741177

11751178
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
11761179
EXPORT_PER_CPU_SYMBOL(current_task);
1180+
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1181+
EXPORT_PER_CPU_SYMBOL(__preempt_count);
11771182
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
11781183

11791184
#ifdef CONFIG_CC_STACKPROTECTOR

arch/x86/kernel/entry_32.S

+2-5
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,9 @@ END(ret_from_exception)
362362
#ifdef CONFIG_PREEMPT
363363
ENTRY(resume_kernel)
364364
DISABLE_INTERRUPTS(CLBR_ANY)
365-
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
366-
jnz restore_all
367365
need_resched:
368-
movl TI_flags(%ebp), %ecx # need_resched set ?
369-
testb $_TIF_NEED_RESCHED, %cl
370-
jz restore_all
366+
cmpl $0,PER_CPU_VAR(__preempt_count)
367+
jnz restore_all
371368
testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
372369
jz restore_all
373370
call preempt_schedule_irq

arch/x86/kernel/entry_64.S

+1-3
Original file line numberDiff line numberDiff line change
@@ -1143,10 +1143,8 @@ retint_signal:
11431143
/* Returning to kernel space. Check if we need preemption */
11441144
/* rcx: threadinfo. interrupts off. */
11451145
ENTRY(retint_kernel)
1146-
cmpl $0,TI_preempt_count(%rcx)
1146+
cmpl $0,PER_CPU_VAR(__preempt_count)
11471147
jnz retint_restore_args
1148-
bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1149-
jnc retint_restore_args
11501148
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
11511149
jnc retint_restore_args
11521150
call preempt_schedule_irq

arch/x86/kernel/irq_32.c

-4
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,6 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
100100
irqctx->tinfo.task = curctx->tinfo.task;
101101
irqctx->tinfo.previous_esp = current_stack_pointer;
102102

103-
/* Copy the preempt_count so that the [soft]irq checks work. */
104-
irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
105-
106103
if (unlikely(overflow))
107104
call_on_stack(print_stack_overflow, isp);
108105

@@ -131,7 +128,6 @@ void __cpuinit irq_ctx_init(int cpu)
131128
THREAD_SIZE_ORDER));
132129
memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
133130
irqctx->tinfo.cpu = cpu;
134-
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
135131
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
136132

137133
per_cpu(hardirq_ctx, cpu) = irqctx;

arch/x86/kernel/process_32.c

+8
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
286286
if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
287287
set_iopl_mask(next->iopl);
288288

289+
/*
290+
* If it were not for PREEMPT_ACTIVE we could guarantee that the
291+
* preempt_count of all tasks was equal here and this would not be
292+
* needed.
293+
*/
294+
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
295+
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
296+
289297
/*
290298
* Now maybe handle debug registers and/or IO bitmaps
291299
*/

arch/x86/kernel/process_64.c

+8
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
402402
this_cpu_write(old_rsp, next->usersp);
403403
this_cpu_write(current_task, next_p);
404404

405+
/*
406+
* If it were not for PREEMPT_ACTIVE we could guarantee that the
407+
* preempt_count of all tasks was equal here and this would not be
408+
* needed.
409+
*/
410+
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
411+
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
412+
405413
this_cpu_write(kernel_stack,
406414
(unsigned long)task_stack_page(next_p) +
407415
THREAD_SIZE - KERNEL_STACK_OFFSET);

0 commit comments

Comments
 (0)