Commit | Line | Data |
---|---|---|
74a04967 KA |
1 | Application Data Integrity (ADI) |
2 | ================================ | |
3 | ||
4 | SPARC M7 processor adds the Application Data Integrity (ADI) feature. | |
5 | ADI allows a task to set version tags on any subset of its address | |
6 | space. Once ADI is enabled and version tags are set for ranges of | |
7 | address space of a task, the processor will compare the tag in pointers | |
8 | to memory in these ranges to the version set by the application | |
9 | previously. Access to memory is granted only if the tag in given pointer | |
10 | matches the tag set by the application. In case of mismatch, processor | |
11 | raises an exception. | |
12 | ||
13 | Following steps must be taken by a task to enable ADI fully: | |
14 | ||
15 | 1. Set the user mode PSTATE.mcde bit. This acts as master switch for | |
16 | the task's entire address space to enable/disable ADI for the task. | |
17 | ||
18 | 2. Set TTE.mcd bit on any TLB entries that correspond to the range of | |
19 | addresses ADI is being enabled on. MMU checks the version tag only | |
20 | on the pages that have TTE.mcd bit set. | |
21 | ||
22 | 3. Set the version tag for virtual addresses using stxa instruction | |
23 | and one of the MCD specific ASIs. Each stxa instruction sets the | |
24 | given tag for one ADI block size number of bytes. This step must | |
25 | be repeated for entire page to set tags for entire page. | |
26 | ||
27 | ADI block size for the platform is provided by the hypervisor to kernel | |
28 | in machine description tables. Hypervisor also provides the number of | |
29 | top bits in the virtual address that specify the version tag. Once | |
30 | version tag has been set for a memory location, the tag is stored in the | |
31 | physical memory and the same tag must be present in the ADI version tag | |
32 | bits of the virtual address being presented to the MMU. For example on | |
33 | SPARC M7 processor, MMU uses bits 63-60 for version tags and ADI block | |
34 | size is same as cacheline size which is 64 bytes. A task that sets ADI | |
35 | version to, say 10, on a range of memory, must access that memory using | |
36 | virtual addresses that contain 0xa in bits 63-60. | |
37 | ||
38 | ADI is enabled on a set of pages using mprotect() with PROT_ADI flag. | |
39 | When ADI is enabled on a set of pages by a task for the first time, | |
40 | kernel sets the PSTATE.mcde bit fot the task. Version tags for memory | |
41 | addresses are set with an stxa instruction on the addresses using | |
42 | ASI_MCD_PRIMARY or ASI_MCD_ST_BLKINIT_PRIMARY. ADI block size is | |
43 | provided by the hypervisor to the kernel. Kernel returns the value of | |
44 | ADI block size to userspace using auxiliary vector along with other ADI | |
45 | info. Following auxiliary vectors are provided by the kernel: | |
46 | ||
47 | AT_ADI_BLKSZ ADI block size. This is the granularity and | |
48 | alignment, in bytes, of ADI versioning. | |
49 | AT_ADI_NBITS Number of ADI version bits in the VA | |
50 | ||
51 | ||
52 | IMPORTANT NOTES: | |
53 | ||
54 | - Version tag values of 0x0 and 0xf are reserved. These values match any | |
55 | tag in virtual address and never generate a mismatch exception. | |
56 | ||
57 | - Version tags are set on virtual addresses from userspace even though | |
58 | tags are stored in physical memory. Tags are set on a physical page | |
59 | after it has been allocated to a task and a pte has been created for | |
60 | it. | |
61 | ||
62 | - When a task frees a memory page it had set version tags on, the page | |
63 | goes back to free page pool. When this page is re-allocated to a task, | |
64 | kernel clears the page using block initialization ASI which clears the | |
65 | version tags as well for the page. If a page allocated to a task is | |
66 | freed and allocated back to the same task, old version tags set by the | |
67 | task on that page will no longer be present. | |
68 | ||
69 | - ADI tag mismatches are not detected for non-faulting loads. | |
70 | ||
71 | - Kernel does not set any tags for user pages and it is entirely a | |
72 | task's responsibility to set any version tags. Kernel does ensure the | |
73 | version tags are preserved if a page is swapped out to the disk and | |
74 | swapped back in. It also preserves that version tags if a page is | |
75 | migrated. | |
76 | ||
77 | - ADI works for any size pages. A userspace task need not be aware of | |
78 | page size when using ADI. It can simply select a virtual address | |
79 | range, enable ADI on the range using mprotect() and set version tags | |
80 | for the entire range. mprotect() ensures range is aligned to page size | |
81 | and is a multiple of page size. | |
82 | ||
83 | - ADI tags can only be set on writable memory. For example, ADI tags can | |
84 | not be set on read-only mappings. | |
85 | ||
86 | ||
87 | ||
88 | ADI related traps | |
89 | ----------------- | |
90 | ||
91 | With ADI enabled, following new traps may occur: | |
92 | ||
93 | Disrupting memory corruption | |
94 | ||
95 | When a store accesses a memory localtion that has TTE.mcd=1, | |
96 | the task is running with ADI enabled (PSTATE.mcde=1), and the ADI | |
97 | tag in the address used (bits 63:60) does not match the tag set on | |
98 | the corresponding cacheline, a memory corruption trap occurs. By | |
99 | default, it is a disrupting trap and is sent to the hypervisor | |
100 | first. Hypervisor creates a sun4v error report and sends a | |
101 | resumable error (TT=0x7e) trap to the kernel. The kernel sends | |
102 | a SIGSEGV to the task that resulted in this trap with the following | |
103 | info: | |
104 | ||
105 | siginfo.si_signo = SIGSEGV; | |
106 | siginfo.errno = 0; | |
107 | siginfo.si_code = SEGV_ADIDERR; | |
108 | siginfo.si_addr = addr; /* PC where first mismatch occurred */ | |
109 | siginfo.si_trapno = 0; | |
110 | ||
111 | ||
112 | Precise memory corruption | |
113 | ||
114 | When a store accesses a memory location that has TTE.mcd=1, | |
115 | the task is running with ADI enabled (PSTATE.mcde=1), and the ADI | |
116 | tag in the address used (bits 63:60) does not match the tag set on | |
117 | the corresponding cacheline, a memory corruption trap occurs. If | |
118 | MCD precise exception is enabled (MCDPERR=1), a precise | |
119 | exception is sent to the kernel with TT=0x1a. The kernel sends | |
120 | a SIGSEGV to the task that resulted in this trap with the following | |
121 | info: | |
122 | ||
123 | siginfo.si_signo = SIGSEGV; | |
124 | siginfo.errno = 0; | |
125 | siginfo.si_code = SEGV_ADIPERR; | |
126 | siginfo.si_addr = addr; /* address that caused trap */ | |
127 | siginfo.si_trapno = 0; | |
128 | ||
129 | NOTE: ADI tag mismatch on a load always results in precise trap. | |
130 | ||
131 | ||
132 | MCD disabled | |
133 | ||
134 | When a task has not enabled ADI and attempts to set ADI version | |
135 | on a memory address, processor sends an MCD disabled trap. This | |
136 | trap is handled by hypervisor first and the hypervisor vectors this | |
137 | trap through to the kernel as Data Access Exception trap with | |
138 | fault type set to 0xa (invalid ASI). When this occurs, the kernel | |
139 | sends the task SIGSEGV signal with following info: | |
140 | ||
141 | siginfo.si_signo = SIGSEGV; | |
142 | siginfo.errno = 0; | |
143 | siginfo.si_code = SEGV_ACCADI; | |
144 | siginfo.si_addr = addr; /* address that caused trap */ | |
145 | siginfo.si_trapno = 0; | |
146 | ||
147 | ||
148 | Sample program to use ADI | |
149 | ------------------------- | |
150 | ||
151 | Following sample program is meant to illustrate how to use the ADI | |
152 | functionality. | |
153 | ||
154 | #include <unistd.h> | |
155 | #include <stdio.h> | |
156 | #include <stdlib.h> | |
157 | #include <elf.h> | |
158 | #include <sys/ipc.h> | |
159 | #include <sys/shm.h> | |
160 | #include <sys/mman.h> | |
161 | #include <asm/asi.h> | |
162 | ||
163 | #ifndef AT_ADI_BLKSZ | |
164 | #define AT_ADI_BLKSZ 48 | |
165 | #endif | |
166 | #ifndef AT_ADI_NBITS | |
167 | #define AT_ADI_NBITS 49 | |
168 | #endif | |
169 | ||
170 | #ifndef PROT_ADI | |
171 | #define PROT_ADI 0x10 | |
172 | #endif | |
173 | ||
174 | #define BUFFER_SIZE 32*1024*1024UL | |
175 | ||
176 | main(int argc, char* argv[], char* envp[]) | |
177 | { | |
178 | unsigned long i, mcde, adi_blksz, adi_nbits; | |
179 | char *shmaddr, *tmp_addr, *end, *veraddr, *clraddr; | |
180 | int shmid, version; | |
181 | Elf64_auxv_t *auxv; | |
182 | ||
183 | adi_blksz = 0; | |
184 | ||
185 | while(*envp++ != NULL); | |
186 | for (auxv = (Elf64_auxv_t *)envp; auxv->a_type != AT_NULL; auxv++) { | |
187 | switch (auxv->a_type) { | |
188 | case AT_ADI_BLKSZ: | |
189 | adi_blksz = auxv->a_un.a_val; | |
190 | break; | |
191 | case AT_ADI_NBITS: | |
192 | adi_nbits = auxv->a_un.a_val; | |
193 | break; | |
194 | } | |
195 | } | |
196 | if (adi_blksz == 0) { | |
197 | fprintf(stderr, "Oops! ADI is not supported\n"); | |
198 | exit(1); | |
199 | } | |
200 | ||
201 | printf("ADI capabilities:\n"); | |
202 | printf("\tBlock size = %ld\n", adi_blksz); | |
203 | printf("\tNumber of bits = %ld\n", adi_nbits); | |
204 | ||
205 | if ((shmid = shmget(2, BUFFER_SIZE, | |
206 | IPC_CREAT | SHM_R | SHM_W)) < 0) { | |
207 | perror("shmget failed"); | |
208 | exit(1); | |
209 | } | |
210 | ||
211 | shmaddr = shmat(shmid, NULL, 0); | |
212 | if (shmaddr == (char *)-1) { | |
213 | perror("shm attach failed"); | |
214 | shmctl(shmid, IPC_RMID, NULL); | |
215 | exit(1); | |
216 | } | |
217 | ||
218 | if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE|PROT_ADI)) { | |
219 | perror("mprotect failed"); | |
220 | goto err_out; | |
221 | } | |
222 | ||
223 | /* Set the ADI version tag on the shm segment | |
224 | */ | |
225 | version = 10; | |
226 | tmp_addr = shmaddr; | |
227 | end = shmaddr + BUFFER_SIZE; | |
228 | while (tmp_addr < end) { | |
229 | asm volatile( | |
230 | "stxa %1, [%0]0x90\n\t" | |
231 | : | |
232 | : "r" (tmp_addr), "r" (version)); | |
233 | tmp_addr += adi_blksz; | |
234 | } | |
235 | asm volatile("membar #Sync\n\t"); | |
236 | ||
237 | /* Create a versioned address from the normal address by placing | |
238 | * version tag in the upper adi_nbits bits | |
239 | */ | |
240 | tmp_addr = (void *) ((unsigned long)shmaddr << adi_nbits); | |
241 | tmp_addr = (void *) ((unsigned long)tmp_addr >> adi_nbits); | |
242 | veraddr = (void *) (((unsigned long)version << (64-adi_nbits)) | |
243 | | (unsigned long)tmp_addr); | |
244 | ||
245 | printf("Starting the writes:\n"); | |
246 | for (i = 0; i < BUFFER_SIZE; i++) { | |
247 | veraddr[i] = (char)(i); | |
248 | if (!(i % (1024 * 1024))) | |
249 | printf("."); | |
250 | } | |
251 | printf("\n"); | |
252 | ||
253 | printf("Verifying data..."); | |
254 | fflush(stdout); | |
255 | for (i = 0; i < BUFFER_SIZE; i++) | |
256 | if (veraddr[i] != (char)i) | |
257 | printf("\nIndex %lu mismatched\n", i); | |
258 | printf("Done.\n"); | |
259 | ||
260 | /* Disable ADI and clean up | |
261 | */ | |
262 | if (mprotect(shmaddr, BUFFER_SIZE, PROT_READ|PROT_WRITE)) { | |
263 | perror("mprotect failed"); | |
264 | goto err_out; | |
265 | } | |
266 | ||
267 | if (shmdt((const void *)shmaddr) != 0) | |
268 | perror("Detach failure"); | |
269 | shmctl(shmid, IPC_RMID, NULL); | |
270 | ||
271 | exit(0); | |
272 | ||
273 | err_out: | |
274 | if (shmdt((const void *)shmaddr) != 0) | |
275 | perror("Detach failure"); | |
276 | shmctl(shmid, IPC_RMID, NULL); | |
277 | exit(1); | |
278 | } |