arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18
  19 /*
  20  * copy_user_generic_unrolled - memory copy with exception handling.
  21  * This version is for CPUs like P4 that don't have efficient micro
  22  * code for rep movsq
  23  *
  24  * Input:
  25  * rdi destination
  26  * rsi source
  27  * rdx count
  28  *
  29  * Output:
  30  * eax uncopied bytes or 0 if successful.
  31  */
  32 ENTRY(copy_user_generic_unrolled)
  33         ASM_STAC
  34         cmpl $8,%edx
  35         jb 20f          /* less then 8 bytes, go to byte copy loop */
  36         ALIGN_DESTINATION
  37         movl %edx,%ecx
  38         andl $63,%edx
  39         shrl $6,%ecx
  40         jz 17f
  41 1:      movq (%rsi),%r8
  42 2:      movq 1*8(%rsi),%r9
  43 3:      movq 2*8(%rsi),%r10
  44 4:      movq 3*8(%rsi),%r11
  45 5:      movq %r8,(%rdi)
  46 6:      movq %r9,1*8(%rdi)
  47 7:      movq %r10,2*8(%rdi)
  48 8:      movq %r11,3*8(%rdi)
  49 9:      movq 4*8(%rsi),%r8
  50 10:     movq 5*8(%rsi),%r9
  51 11:     movq 6*8(%rsi),%r10
  52 12:     movq 7*8(%rsi),%r11
  53 13:     movq %r8,4*8(%rdi)
  54 14:     movq %r9,5*8(%rdi)
  55 15:     movq %r10,6*8(%rdi)
  56 16:     movq %r11,7*8(%rdi)
  57         leaq 64(%rsi),%rsi
  58         leaq 64(%rdi),%rdi
  59         decl %ecx
  60         jnz 1b
  61 17:     movl %edx,%ecx
  62         andl $7,%edx
  63         shrl $3,%ecx
  64         jz 20f
  65 18:     movq (%rsi),%r8
  66 19:     movq %r8,(%rdi)
  67         leaq 8(%rsi),%rsi
  68         leaq 8(%rdi),%rdi
  69         decl %ecx
  70         jnz 18b
  71 20:     andl %edx,%edx
  72         jz 23f
  73         movl %edx,%ecx
  74 21:     movb (%rsi),%al
  75 22:     movb %al,(%rdi)
  76         incq %rsi
  77         incq %rdi
  78         decl %ecx
  79         jnz 21b
  80 23:     xor %eax,%eax
  81         ASM_CLAC
  82         ret
  83
  84         .section .fixup,"ax"
  85 30:     shll $6,%ecx
  86         addl %ecx,%edx
  87         jmp 60f
  88 40:     leal (%rdx,%rcx,8),%edx
  89         jmp 60f
  90 50:     movl %ecx,%edx
  91 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
  92         .previous
  93
  94         _ASM_EXTABLE(1b,30b)
  95         _ASM_EXTABLE(2b,30b)
  96         _ASM_EXTABLE(3b,30b)
  97         _ASM_EXTABLE(4b,30b)
  98         _ASM_EXTABLE(5b,30b)
  99         _ASM_EXTABLE(6b,30b)
 100         _ASM_EXTABLE(7b,30b)
 101         _ASM_EXTABLE(8b,30b)
 102         _ASM_EXTABLE(9b,30b)
 103         _ASM_EXTABLE(10b,30b)
 104         _ASM_EXTABLE(11b,30b)
 105         _ASM_EXTABLE(12b,30b)
 106         _ASM_EXTABLE(13b,30b)
 107         _ASM_EXTABLE(14b,30b)
 108         _ASM_EXTABLE(15b,30b)
 109         _ASM_EXTABLE(16b,30b)
 110         _ASM_EXTABLE(18b,40b)
 111         _ASM_EXTABLE(19b,40b)
 112         _ASM_EXTABLE(21b,50b)
 113         _ASM_EXTABLE(22b,50b)
 114 ENDPROC(copy_user_generic_unrolled)
 115 EXPORT_SYMBOL(copy_user_generic_unrolled)
 116
 117 /* Some CPUs run faster using the string copy instructions.
 118  * This is also a lot simpler. Use them when possible.
 119  *
 120  * Only 4GB of copy is supported. This shouldn't be a problem
 121  * because the kernel normally only writes from/to page sized chunks
 122  * even if user space passed a longer buffer.
 123  * And more would be dangerous because both Intel and AMD have
 124  * errata with rep movsq > 4GB. If someone feels the need to fix
 125  * this please consider this.
 126  *
 127  * Input:
 128  * rdi destination
 129  * rsi source
 130  * rdx count
 131  *
 132  * Output:
 133  * eax uncopied bytes or 0 if successful.
 134  */
 135 ENTRY(copy_user_generic_string)
 136         ASM_STAC
 137         cmpl $8,%edx
 138         jb 2f           /* less than 8 bytes, go to byte copy loop */
 139         ALIGN_DESTINATION
 140         movl %edx,%ecx
 141         shrl $3,%ecx
 142         andl $7,%edx
 143 1:      rep
 144         movsq
 145 2:      movl %edx,%ecx
 146 3:      rep
 147         movsb
 148         xorl %eax,%eax
 149         ASM_CLAC
 150         ret
 151
 152         .section .fixup,"ax"
 153 11:     leal (%rdx,%rcx,8),%ecx
 154 12:     movl %ecx,%edx          /* ecx is zerorest also */
 155         jmp copy_user_handle_tail
 156         .previous
 157
 158         _ASM_EXTABLE(1b,11b)
 159         _ASM_EXTABLE(3b,12b)
 160 ENDPROC(copy_user_generic_string)
 161 EXPORT_SYMBOL(copy_user_generic_string)
 162
 163 /*
 164  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 165  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 166  *
 167  * Input:
 168  * rdi destination
 169  * rsi source
 170  * rdx count
 171  *
 172  * Output:
 173  * eax uncopied bytes or 0 if successful.
 174  */
 175 ENTRY(copy_user_enhanced_fast_string)
 176         ASM_STAC
 177         movl %edx,%ecx
 178 1:      rep
 179         movsb
 180         xorl %eax,%eax
 181         ASM_CLAC
 182         ret
 183
 184         .section .fixup,"ax"
 185 12:     movl %ecx,%edx          /* ecx is zerorest also */
 186         jmp copy_user_handle_tail
 187         .previous
 188
 189         _ASM_EXTABLE(1b,12b)
 190 ENDPROC(copy_user_enhanced_fast_string)
 191 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 192
 193 /*
 194  * copy_user_nocache - Uncached memory copy with exception handling
 195  * This will force destination out of cache for more performance.
 196  *
 197  * Note: Cached memory copy is used when destination or size is not
 198  * naturally aligned. That is:
 199  *  - Require 8-byte alignment when size is 8 bytes or larger.
 200  *  - Require 4-byte alignment when size is 4 bytes.
 201  */
 202 ENTRY(__copy_user_nocache)
 203         ASM_STAC
 204
 205         /* If size is less than 8 bytes, go to 4-byte copy */
 206         cmpl $8,%edx
 207         jb .L_4b_nocache_copy_entry
 208
 209         /* If destination is not 8-byte aligned, "cache" copy to align it */
 210         ALIGN_DESTINATION
 211
 212         /* Set 4x8-byte copy count and remainder */
 213         movl %edx,%ecx
 214         andl $63,%edx
 215         shrl $6,%ecx
 216         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 217
 218         /* Perform 4x8-byte nocache loop-copy */
 219 .L_4x8b_nocache_copy_loop:
 220 1:      movq (%rsi),%r8
 221 2:      movq 1*8(%rsi),%r9
 222 3:      movq 2*8(%rsi),%r10
 223 4:      movq 3*8(%rsi),%r11
 224 5:      movnti %r8,(%rdi)
 225 6:      movnti %r9,1*8(%rdi)
 226 7:      movnti %r10,2*8(%rdi)
 227 8:      movnti %r11,3*8(%rdi)
 228 9:      movq 4*8(%rsi),%r8
 229 10:     movq 5*8(%rsi),%r9
 230 11:     movq 6*8(%rsi),%r10
 231 12:     movq 7*8(%rsi),%r11
 232 13:     movnti %r8,4*8(%rdi)
 233 14:     movnti %r9,5*8(%rdi)
 234 15:     movnti %r10,6*8(%rdi)
 235 16:     movnti %r11,7*8(%rdi)
 236         leaq 64(%rsi),%rsi
 237         leaq 64(%rdi),%rdi
 238         decl %ecx
 239         jnz .L_4x8b_nocache_copy_loop
 240
 241         /* Set 8-byte copy count and remainder */
 242 .L_8b_nocache_copy_entry:
 243         movl %edx,%ecx
 244         andl $7,%edx
 245         shrl $3,%ecx
 246         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 247
 248         /* Perform 8-byte nocache loop-copy */
 249 .L_8b_nocache_copy_loop:
 250 20:     movq (%rsi),%r8
 251 21:     movnti %r8,(%rdi)
 252         leaq 8(%rsi),%rsi
 253         leaq 8(%rdi),%rdi
 254         decl %ecx
 255         jnz .L_8b_nocache_copy_loop
 256
 257         /* If no byte left, we're done */
 258 .L_4b_nocache_copy_entry:
 259         andl %edx,%edx
 260         jz .L_finish_copy
 261
 262         /* If destination is not 4-byte aligned, go to byte copy: */
 263         movl %edi,%ecx
 264         andl $3,%ecx
 265         jnz .L_1b_cache_copy_entry
 266
 267         /* Set 4-byte copy count (1 or 0) and remainder */
 268         movl %edx,%ecx
 269         andl $3,%edx
 270         shrl $2,%ecx
 271         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 272
 273         /* Perform 4-byte nocache copy: */
 274 30:     movl (%rsi),%r8d
 275 31:     movnti %r8d,(%rdi)
 276         leaq 4(%rsi),%rsi
 277         leaq 4(%rdi),%rdi
 278
 279         /* If no bytes left, we're done: */
 280         andl %edx,%edx
 281         jz .L_finish_copy
 282
 283         /* Perform byte "cache" loop-copy for the remainder */
 284 .L_1b_cache_copy_entry:
 285         movl %edx,%ecx
 286 .L_1b_cache_copy_loop:
 287 40:     movb (%rsi),%al
 288 41:     movb %al,(%rdi)
 289         incq %rsi
 290         incq %rdi
 291         decl %ecx
 292         jnz .L_1b_cache_copy_loop
 293
 294         /* Finished copying; fence the prior stores */
 295 .L_finish_copy:
 296         xorl %eax,%eax
 297         ASM_CLAC
 298         sfence
 299         ret
 300
 301         .section .fixup,"ax"
 302 .L_fixup_4x8b_copy:
 303         shll $6,%ecx
 304         addl %ecx,%edx
 305         jmp .L_fixup_handle_tail
 306 .L_fixup_8b_copy:
 307         lea (%rdx,%rcx,8),%rdx
 308         jmp .L_fixup_handle_tail
 309 .L_fixup_4b_copy:
 310         lea (%rdx,%rcx,4),%rdx
 311         jmp .L_fixup_handle_tail
 312 .L_fixup_1b_copy:
 313         movl %ecx,%edx
 314 .L_fixup_handle_tail:
 315         sfence
 316         jmp copy_user_handle_tail
 317         .previous
 318
 319         _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
 320         _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
 321         _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
 322         _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
 323         _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
 324         _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
 325         _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
 326         _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
 327         _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
 328         _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
 329         _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
 330         _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
 331         _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
 332         _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
 333         _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
 334         _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
 335         _ASM_EXTABLE(20b,.L_fixup_8b_copy)
 336         _ASM_EXTABLE(21b,.L_fixup_8b_copy)
 337         _ASM_EXTABLE(30b,.L_fixup_4b_copy)
 338         _ASM_EXTABLE(31b,.L_fixup_4b_copy)
 339         _ASM_EXTABLE(40b,.L_fixup_1b_copy)
 340         _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 341 ENDPROC(__copy_user_nocache)
 342 EXPORT_SYMBOL(__copy_user_nocache)