[linux-2.6-block.git] / arch / x86 / lib / copy_user_64.S

/*
 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
 * Copyright 2002 Andi Kleen, SuSE Labs.
 * Subject to the GNU Public License v2.
 *
 * Functions to copy from and to user space.
 */

#include <linux/linkage.h>
#include <asm/dwarf2.h>

#define FIX_ALIGNMENT 1

#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>

/*
 * By placing feature2 after feature1 in altinstructions section, we logically
 * implement:
 * If CPU has feature2, jmp to alt2 is used
 * else if CPU has feature1, jmp to alt1 is used
 * else jmp to orig is used.
 */
	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
0:
	.byte 0xe9	/* 32bit jump */
	.long \orig-1f	/* by default jump to orig */
1:
	.section .altinstr_replacement,"ax"
2:	.byte 0xe9			/* near jump with 32bit immediate */
	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
3:	.byte 0xe9			/* near jump with 32bit immediate */
	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
	.previous

	.section .altinstructions,"a"
	altinstruction_entry 0b,2b,\feature1,5,5
	altinstruction_entry 0b,3b,\feature2,5,5
	.previous
	.endm

	.macro ALIGN_DESTINATION
#ifdef FIX_ALIGNMENT
	/* check for bad alignment of destination */
	movl %edi,%ecx
	andl $7,%ecx
	jz 102f				/* already aligned */
	subl $8,%ecx
	negl %ecx
	subl %ecx,%edx
100:	movb (%rsi),%al
101:	movb %al,(%rdi)
	incq %rsi
	incq %rdi
	decl %ecx
	jnz 100b
102:
	.section .fixup,"ax"
103:	addl %ecx,%edx			/* ecx is zerorest also */
	jmp copy_user_handle_tail
	.previous

	.section __ex_table,"a"
	.align 8
	.quad 100b,103b
	.quad 101b,103b
	.previous
#endif
	.endm

/* Standard copy_to_user with segment limit checking */
ENTRY(_copy_to_user)
	CFI_STARTPROC
	GET_THREAD_INFO(%rax)
	movq %rdi,%rcx
	addq %rdx,%rcx
	jc bad_to_user
	cmpq TI_addr_limit(%rax),%rcx
	jae bad_to_user
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
		copy_user_generic_unrolled,copy_user_generic_string,	\
		copy_user_enhanced_fast_string
	CFI_ENDPROC
ENDPROC(_copy_to_user)

/* Standard copy_from_user with segment limit checking */
ENTRY(_copy_from_user)
	CFI_STARTPROC
	GET_THREAD_INFO(%rax)
	movq %rsi,%rcx
	addq %rdx,%rcx
	jc bad_from_user
	cmpq TI_addr_limit(%rax),%rcx
	jae bad_from_user
	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS,	\
		copy_user_generic_unrolled,copy_user_generic_string,	\
		copy_user_enhanced_fast_string
	CFI_ENDPROC
ENDPROC(_copy_from_user)

	.section .fixup,"ax"
	/* must zero dest */
ENTRY(bad_from_user)
bad_from_user:
	CFI_STARTPROC
	movl %edx,%ecx
	xorl %eax,%eax
	rep
	stosb
bad_to_user:
	movl %edx,%eax
	ret
	CFI_ENDPROC
ENDPROC(bad_from_user)
	.previous

/*
 * copy_user_generic_unrolled - memory copy with exception handling.
 * This version is for CPUs like P4 that don't have efficient micro
 * code for rep movsq
 *
 * Input:
 * rdi destination
 * rsi source
 * rdx count
 *
 * Output:
 * eax uncopied bytes or 0 if successful.
 */
ENTRY(copy_user_generic_unrolled)
	CFI_STARTPROC
	cmpl $8,%edx
	jb 20f		/* less then 8 bytes, go to byte copy loop */
	ALIGN_DESTINATION
	movl %edx,%ecx
	andl $63,%edx
	shrl $6,%ecx
	jz 17f
1:	movq (%rsi),%r8
2:	movq 1*8(%rsi),%r9
3:	movq 2*8(%rsi),%r10
4:	movq 3*8(%rsi),%r11
5:	movq %r8,(%rdi)
6:	movq %r9,1*8(%rdi)
7:	movq %r10,2*8(%rdi)
8:	movq %r11,3*8(%rdi)
9:	movq 4*8(%rsi),%r8
10:	movq 5*8(%rsi),%r9
11:	movq 6*8(%rsi),%r10
12:	movq 7*8(%rsi),%r11
13:	movq %r8,4*8(%rdi)
14:	movq %r9,5*8(%rdi)
15:	movq %r10,6*8(%rdi)
16:	movq %r11,7*8(%rdi)
	leaq 64(%rsi),%rsi
	leaq 64(%rdi),%rdi
	decl %ecx
	jnz 1b
17:	movl %edx,%ecx
	andl $7,%edx
	shrl $3,%ecx
	jz 20f
18:	movq (%rsi),%r8
19:	movq %r8,(%rdi)
	leaq 8(%rsi),%rsi
	leaq 8(%rdi),%rdi
	decl %ecx
	jnz 18b
20:	andl %edx,%edx
	jz 23f
	movl %edx,%ecx
21:	movb (%rsi),%al
22:	movb %al,(%rdi)
	incq %rsi
	incq %rdi
	decl %ecx
	jnz 21b
23:	xor %eax,%eax
	ret

	.section .fixup,"ax"
30:	shll $6,%ecx
	addl %ecx,%edx
	jmp 60f
40:	lea (%rdx,%rcx,8),%rdx
	jmp 60f
50:	movl %ecx,%edx
60:	jmp copy_user_handle_tail /* ecx is zerorest also */
	.previous

	.section __ex_table,"a"
	.align 8
	.quad 1b,30b
	.quad 2b,30b
	.quad 3b,30b
	.quad 4b,30b
	.quad 5b,30b
	.quad 6b,30b
	.quad 7b,30b
	.quad 8b,30b
	.quad 9b,30b
	.quad 10b,30b
	.quad 11b,30b
	.quad 12b,30b
	.quad 13b,30b
	.quad 14b,30b
	.quad 15b,30b
	.quad 16b,30b
	.quad 18b,40b
	.quad 19b,40b
	.quad 21b,50b
	.quad 22b,50b
	.previous
	CFI_ENDPROC
ENDPROC(copy_user_generic_unrolled)

/* Some CPUs run faster using the string copy instructions.
 * This is also a lot simpler. Use them when possible.
 *
 * Only 4GB of copy is supported. This shouldn't be a problem
 * because the kernel normally only writes from/to page sized chunks
 * even if user space passed a longer buffer.
 * And more would be dangerous because both Intel and AMD have
 * errata with rep movsq > 4GB. If someone feels the need to fix
 * this please consider this.
 *
 * Input:
 * rdi destination
 * rsi source
 * rdx count
 *
 * Output:
 * eax uncopied bytes or 0 if successful.
 */
ENTRY(copy_user_generic_string)
	CFI_STARTPROC
	andl %edx,%edx
	jz 4f
	cmpl $8,%edx
	jb 2f		/* less than 8 bytes, go to byte copy loop */
	ALIGN_DESTINATION
	movl %edx,%ecx
	shrl $3,%ecx
	andl $7,%edx
1:	rep
	movsq
2:	movl %edx,%ecx
3:	rep
	movsb
4:	xorl %eax,%eax
	ret

	.section .fixup,"ax"
11:	lea (%rdx,%rcx,8),%rcx
12:	movl %ecx,%edx		/* ecx is zerorest also */
	jmp copy_user_handle_tail
	.previous

	.section __ex_table,"a"
	.align 8
	.quad 1b,11b
	.quad 3b,12b
	.previous
	CFI_ENDPROC
ENDPROC(copy_user_generic_string)

/*
 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 *
 * Input:
 * rdi destination
 * rsi source
 * rdx count
 *
 * Output:
 * eax uncopied bytes or 0 if successful.
 */
ENTRY(copy_user_enhanced_fast_string)
	CFI_STARTPROC
	andl %edx,%edx
	jz 2f
	movl %edx,%ecx
1:	rep
	movsb
2:	xorl %eax,%eax
	ret

	.section .fixup,"ax"
12:	movl %ecx,%edx		/* ecx is zerorest also */
	jmp copy_user_handle_tail
	.previous

	.section __ex_table,"a"
	.align 8
	.quad 1b,12b
	.previous
	CFI_ENDPROC
ENDPROC(copy_user_enhanced_fast_string)
Commit	Line	Data
ad2fc2cd VM	1	/*
	2	* Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
	3	* Copyright 2002 Andi Kleen, SuSE Labs.
1da177e4	4	* Subject to the GNU Public License v2.
ad2fc2cd VM	5	*
	6	* Functions to copy from and to user space.
	7	*/
1da177e4	8
8d379dad JB	9	#include <linux/linkage.h>
	10	#include <asm/dwarf2.h>
	11
7bcd3f34 AK	12	#define FIX_ALIGNMENT 1
7bcd3f34 AK	13
3022d734 AK	14	#include <asm/current.h>
	15	#include <asm/asm-offsets.h>
	16	#include <asm/thread_info.h>
	17	#include <asm/cpufeature.h>
4307bec9	18	#include <asm/alternative-asm.h>
3022d734	19
4307bec9 FY	20	/*
	21	* By placing feature2 after feature1 in altinstructions section, we logically
	22	* implement:
	23	* If CPU has feature2, jmp to alt2 is used
	24	* else if CPU has feature1, jmp to alt1 is used
	25	* else jmp to orig is used.
	26	*/
	27	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
3022d734 AK	28	0:
	29	.byte 0xe9 /* 32bit jump */
	30	.long \orig-1f /* by default jump to orig */
	31	1:
	32	.section .altinstr_replacement,"ax"
ad2fc2cd	33	2: .byte 0xe9 /* near jump with 32bit immediate */
4307bec9 FY	34	.long \alt1-1b /* offset / / or alternatively to alt1 */
	35	3: .byte 0xe9 /* near jump with 32bit immediate */
	36	.long \alt2-1b /* offset / / or alternatively to alt2 */
3022d734	37	.previous
4307bec9	38
3022d734	39	.section .altinstructions,"a"
4307bec9 FY	40	altinstruction_entry 0b,2b,\feature1,5,5
4307bec9 FY	41	altinstruction_entry 0b,3b,\feature2,5,5
3022d734 AK	42	.previous
3022d734 AK	43	.endm
1da177e4	44
ad2fc2cd VM	45	.macro ALIGN_DESTINATION
	46	#ifdef FIX_ALIGNMENT
	47	/* check for bad alignment of destination */
	48	movl %edi,%ecx
	49	andl $7,%ecx
	50	jz 102f /* already aligned */
	51	subl $8,%ecx
	52	negl %ecx
	53	subl %ecx,%edx
	54	100: movb (%rsi),%al
	55	101: movb %al,(%rdi)
	56	incq %rsi
	57	incq %rdi
	58	decl %ecx
	59	jnz 100b
	60	102:
	61	.section .fixup,"ax"
afd962a9	62	103: addl %ecx,%edx /* ecx is zerorest also */
ad2fc2cd VM	63	jmp copy_user_handle_tail
	64	.previous
	65
	66	.section __ex_table,"a"
	67	.align 8
	68	.quad 100b,103b
	69	.quad 101b,103b
	70	.previous
	71	#endif
	72	.endm
	73
	74	/* Standard copy_to_user with segment limit checking */
3c93ca00	75	ENTRY(_copy_to_user)
8d379dad	76	CFI_STARTPROC
1da177e4 LT	77	GET_THREAD_INFO(%rax)
	78	movq %rdi,%rcx
	79	addq %rdx,%rcx
ad2fc2cd	80	jc bad_to_user
26ccb8a7	81	cmpq TI_addr_limit(%rax),%rcx
1da177e4	82	jae bad_to_user
4307bec9 FY	83	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
	84	copy_user_generic_unrolled,copy_user_generic_string, \
	85	copy_user_enhanced_fast_string
8d379dad	86	CFI_ENDPROC
3c93ca00	87	ENDPROC(_copy_to_user)
7bcd3f34	88
ad2fc2cd	89	/* Standard copy_from_user with segment limit checking */
9f0cf4ad	90	ENTRY(_copy_from_user)
3022d734	91	CFI_STARTPROC
ad2fc2cd VM	92	GET_THREAD_INFO(%rax)
	93	movq %rsi,%rcx
	94	addq %rdx,%rcx
	95	jc bad_from_user
	96	cmpq TI_addr_limit(%rax),%rcx
	97	jae bad_from_user
4307bec9 FY	98	ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
	99	copy_user_generic_unrolled,copy_user_generic_string, \
	100	copy_user_enhanced_fast_string
3022d734	101	CFI_ENDPROC
9f0cf4ad	102	ENDPROC(_copy_from_user)
3022d734	103
1da177e4 LT	104	.section .fixup,"ax"
1da177e4 LT	105	/* must zero dest */
ad2fc2cd	106	ENTRY(bad_from_user)
1da177e4	107	bad_from_user:
8d379dad	108	CFI_STARTPROC
1da177e4 LT	109	movl %edx,%ecx
	110	xorl %eax,%eax
	111	rep
	112	stosb
	113	bad_to_user:
ad2fc2cd	114	movl %edx,%eax
1da177e4	115	ret
8d379dad	116	CFI_ENDPROC
ad2fc2cd	117	ENDPROC(bad_from_user)
1da177e4	118	.previous
ad2fc2cd	119
1da177e4	120	/*
3022d734	121	* copy_user_generic_unrolled - memory copy with exception handling.
ad2fc2cd VM	122	* This version is for CPUs like P4 that don't have efficient micro
	123	* code for rep movsq
	124	*
	125	* Input:
1da177e4 LT	126	* rdi destination
	127	* rsi source
	128	* rdx count
	129	*
ad2fc2cd	130	* Output:
0d2eb44f	131	* eax uncopied bytes or 0 if successful.
1da177e4	132	*/
3022d734	133	ENTRY(copy_user_generic_unrolled)
8d379dad	134	CFI_STARTPROC
ad2fc2cd VM	135	cmpl $8,%edx
	136	jb 20f /* less then 8 bytes, go to byte copy loop */
	137	ALIGN_DESTINATION
	138	movl %edx,%ecx
	139	andl $63,%edx
	140	shrl $6,%ecx
	141	jz 17f
	142	1: movq (%rsi),%r8
	143	2: movq 1*8(%rsi),%r9
	144	3: movq 2*8(%rsi),%r10
	145	4: movq 3*8(%rsi),%r11
	146	5: movq %r8,(%rdi)
	147	6: movq %r9,1*8(%rdi)
	148	7: movq %r10,2*8(%rdi)
	149	8: movq %r11,3*8(%rdi)
	150	9: movq 4*8(%rsi),%r8
	151	10: movq 5*8(%rsi),%r9
	152	11: movq 6*8(%rsi),%r10
	153	12: movq 7*8(%rsi),%r11
	154	13: movq %r8,4*8(%rdi)
	155	14: movq %r9,5*8(%rdi)
	156	15: movq %r10,6*8(%rdi)
	157	16: movq %r11,7*8(%rdi)
7bcd3f34 AK	158	leaq 64(%rsi),%rsi
7bcd3f34 AK	159	leaq 64(%rdi),%rdi
7bcd3f34	160	decl %ecx
ad2fc2cd VM	161	jnz 1b
	162	17: movl %edx,%ecx
	163	andl $7,%edx
	164	shrl $3,%ecx
	165	jz 20f
	166	18: movq (%rsi),%r8
	167	19: movq %r8,(%rdi)
7bcd3f34	168	leaq 8(%rsi),%rsi
ad2fc2cd VM	169	leaq 8(%rdi),%rdi
	170	decl %ecx
	171	jnz 18b
	172	20: andl %edx,%edx
	173	jz 23f
7bcd3f34	174	movl %edx,%ecx
ad2fc2cd VM	175	21: movb (%rsi),%al
ad2fc2cd VM	176	22: movb %al,(%rdi)
7bcd3f34	177	incq %rsi
ad2fc2cd	178	incq %rdi
7bcd3f34	179	decl %ecx
ad2fc2cd VM	180	jnz 21b
ad2fc2cd VM	181	23: xor %eax,%eax
7bcd3f34 AK	182	ret
7bcd3f34 AK	183
ad2fc2cd VM	184	.section .fixup,"ax"
	185	30: shll $6,%ecx
	186	addl %ecx,%edx
	187	jmp 60f
27cb0a75	188	40: lea (%rdx,%rcx,8),%rdx
ad2fc2cd VM	189	jmp 60f
	190	50: movl %ecx,%edx
	191	60: jmp copy_user_handle_tail /* ecx is zerorest also */
	192	.previous
7bcd3f34	193
7bcd3f34 AK	194	.section __ex_table,"a"
7bcd3f34 AK	195	.align 8
ad2fc2cd VM	196	.quad 1b,30b
	197	.quad 2b,30b
	198	.quad 3b,30b
	199	.quad 4b,30b
	200	.quad 5b,30b
	201	.quad 6b,30b
	202	.quad 7b,30b
	203	.quad 8b,30b
	204	.quad 9b,30b
	205	.quad 10b,30b
	206	.quad 11b,30b
	207	.quad 12b,30b
	208	.quad 13b,30b
	209	.quad 14b,30b
	210	.quad 15b,30b
	211	.quad 16b,30b
	212	.quad 18b,40b
	213	.quad 19b,40b
	214	.quad 21b,50b
	215	.quad 22b,50b
7bcd3f34	216	.previous
8d379dad	217	CFI_ENDPROC
ad2fc2cd	218	ENDPROC(copy_user_generic_unrolled)
8d379dad	219
ad2fc2cd VM	220	/* Some CPUs run faster using the string copy instructions.
	221	* This is also a lot simpler. Use them when possible.
	222	*
	223	* Only 4GB of copy is supported. This shouldn't be a problem
	224	* because the kernel normally only writes from/to page sized chunks
	225	* even if user space passed a longer buffer.
	226	* And more would be dangerous because both Intel and AMD have
	227	* errata with rep movsq > 4GB. If someone feels the need to fix
	228	* this please consider this.
	229	*
	230	* Input:
	231	* rdi destination
	232	* rsi source
	233	* rdx count
	234	*
	235	* Output:
	236	* eax uncopied bytes or 0 if successful.
	237	*/
3022d734	238	ENTRY(copy_user_generic_string)
8d379dad	239	CFI_STARTPROC
ad2fc2cd VM	240	andl %edx,%edx
	241	jz 4f
	242	cmpl $8,%edx
	243	jb 2f /* less than 8 bytes, go to byte copy loop */
	244	ALIGN_DESTINATION
1da177e4 LT	245	movl %edx,%ecx
1da177e4 LT	246	shrl $3,%ecx
ad2fc2cd VM	247	andl $7,%edx
ad2fc2cd VM	248	1: rep
3022d734	249	movsq
ad2fc2cd VM	250	2: movl %edx,%ecx
	251	3: rep
	252	movsb
	253	4: xorl %eax,%eax
1da177e4	254	ret
3022d734	255
ad2fc2cd	256	.section .fixup,"ax"
27cb0a75	257	11: lea (%rdx,%rcx,8),%rcx
ad2fc2cd VM	258	12: movl %ecx,%edx /* ecx is zerorest also */
	259	jmp copy_user_handle_tail
	260	.previous
2cbc9ee3	261
1da177e4	262	.section __ex_table,"a"
ad2fc2cd VM	263	.align 8
	264	.quad 1b,11b
	265	.quad 3b,12b
1da177e4	266	.previous
ad2fc2cd VM	267	CFI_ENDPROC
ad2fc2cd VM	268	ENDPROC(copy_user_generic_string)
4307bec9 FY	269
	270	/*
	271	* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
	272	* It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
	273	*
	274	* Input:
	275	* rdi destination
	276	* rsi source
	277	* rdx count
	278	*
	279	* Output:
	280	* eax uncopied bytes or 0 if successful.
	281	*/
	282	ENTRY(copy_user_enhanced_fast_string)
	283	CFI_STARTPROC
	284	andl %edx,%edx
	285	jz 2f
	286	movl %edx,%ecx
	287	1: rep
	288	movsb
	289	2: xorl %eax,%eax
	290	ret
	291
	292	.section .fixup,"ax"
	293	12: movl %ecx,%edx /* ecx is zerorest also */
	294	jmp copy_user_handle_tail
	295	.previous
	296
	297	.section __ex_table,"a"
	298	.align 8
	299	.quad 1b,12b
	300	.previous
	301	CFI_ENDPROC
	302	ENDPROC(copy_user_enhanced_fast_string)