Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux-2.6-block.git] / drivers / hv / ring_buffer.c
CommitLineData
3e7ee490
HJ
1/*
2 *
3 * Copyright (c) 2009, Microsoft Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 * Authors:
19 * Haiyang Zhang <haiyangz@microsoft.com>
20 * Hank Janssen <hjanssen@microsoft.com>
b2a5a585 21 * K. Y. Srinivasan <kys@microsoft.com>
3e7ee490
HJ
22 *
23 */
0a46618d 24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3e7ee490 25
a0086dc5
GKH
26#include <linux/kernel.h>
27#include <linux/mm.h>
46a97191 28#include <linux/hyperv.h>
011a7c3c 29#include <linux/uio.h>
3f335ea2 30
0f2a6619 31#include "hyperv_vmbus.h"
3e7ee490 32
6fdf3b21
S
33void hv_begin_read(struct hv_ring_buffer_info *rbi)
34{
35 rbi->ring_buffer->interrupt_mask = 1;
dcd0eeca 36 virt_mb();
6fdf3b21
S
37}
38
39u32 hv_end_read(struct hv_ring_buffer_info *rbi)
40{
6fdf3b21
S
41
42 rbi->ring_buffer->interrupt_mask = 0;
dcd0eeca 43 virt_mb();
6fdf3b21
S
44
45 /*
46 * Now check to see if the ring buffer is still empty.
47 * If it is not, we raced and we need to process new
48 * incoming messages.
49 */
a6341f00 50 return hv_get_bytes_to_read(rbi);
6fdf3b21
S
51}
52
98fa8cf4
S
53/*
54 * When we write to the ring buffer, check if the host needs to
55 * be signaled. Here is the details of this protocol:
56 *
57 * 1. The host guarantees that while it is draining the
58 * ring buffer, it will set the interrupt_mask to
59 * indicate it does not need to be interrupted when
60 * new data is placed.
61 *
62 * 2. The host guarantees that it will completely drain
63 * the ring buffer before exiting the read loop. Further,
64 * once the ring buffer is empty, it will clear the
65 * interrupt_mask and re-check to see if new data has
66 * arrived.
67 */
68
69static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
70{
dcd0eeca 71 virt_mb();
d45faaee 72 if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
98fa8cf4
S
73 return false;
74
e91e84fa 75 /* check interrupt_mask before read_index */
dcd0eeca 76 virt_rmb();
98fa8cf4
S
77 /*
78 * This is the only case we need to signal when the
79 * ring transitions from being empty to non-empty.
80 */
d45faaee 81 if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
98fa8cf4
S
82 return true;
83
84 return false;
85}
86
822f18d4 87/* Get the next write location for the specified ring buffer. */
4d643114 88static inline u32
2b8a912e 89hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
3e7ee490 90{
fc8c72eb 91 u32 next = ring_info->ring_buffer->write_index;
3e7ee490 92
3e7ee490
HJ
93 return next;
94}
95
822f18d4 96/* Set the next write location for the specified ring buffer. */
3e7ee490 97static inline void
2b8a912e 98hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
fc8c72eb 99 u32 next_write_location)
3e7ee490 100{
fc8c72eb 101 ring_info->ring_buffer->write_index = next_write_location;
3e7ee490
HJ
102}
103
822f18d4 104/* Get the next read location for the specified ring buffer. */
4d643114 105static inline u32
2b8a912e 106hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
3e7ee490 107{
fc8c72eb 108 u32 next = ring_info->ring_buffer->read_index;
3e7ee490 109
3e7ee490
HJ
110 return next;
111}
112
b2a5a585 113/*
b2a5a585 114 * Get the next read location + offset for the specified ring buffer.
822f18d4 115 * This allows the caller to skip.
b2a5a585 116 */
4d643114 117static inline u32
2b8a912e 118hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
1ac58644 119 u32 offset)
3e7ee490 120{
fc8c72eb 121 u32 next = ring_info->ring_buffer->read_index;
3e7ee490 122
fc8c72eb
HZ
123 next += offset;
124 next %= ring_info->ring_datasize;
3e7ee490
HJ
125
126 return next;
127}
128
822f18d4 129/* Set the next read location for the specified ring buffer. */
3e7ee490 130static inline void
2b8a912e 131hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
fc8c72eb 132 u32 next_read_location)
3e7ee490 133{
fc8c72eb 134 ring_info->ring_buffer->read_index = next_read_location;
ab028db4 135 ring_info->priv_read_index = next_read_location;
3e7ee490
HJ
136}
137
822f18d4 138/* Get the size of the ring buffer. */
4d643114 139static inline u32
2b8a912e 140hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
3e7ee490 141{
fc8c72eb 142 return ring_info->ring_datasize;
3e7ee490
HJ
143}
144
822f18d4 145/* Get the read and write indices as u64 of the specified ring buffer. */
59471438 146static inline u64
2b8a912e 147hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
3e7ee490 148{
fc8c72eb 149 return (u64)ring_info->ring_buffer->write_index << 32;
3e7ee490
HJ
150}
151
8f1136ae 152/*
8f1136ae
S
153 * Helper routine to copy to source from ring buffer.
154 * Assume there is enough room. Handles wrap-around in src case only!!
8f1136ae
S
155 */
156static u32 hv_copyfrom_ringbuffer(
157 struct hv_ring_buffer_info *ring_info,
158 void *dest,
159 u32 destlen,
160 u32 start_read_offset)
161{
162 void *ring_buffer = hv_get_ring_buffer(ring_info);
163 u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
164
165 u32 frag_len;
166
167 /* wrap-around detected at the src */
168 if (destlen > ring_buffer_size - start_read_offset) {
169 frag_len = ring_buffer_size - start_read_offset;
170
171 memcpy(dest, ring_buffer + start_read_offset, frag_len);
172 memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
173 } else
174
175 memcpy(dest, ring_buffer + start_read_offset, destlen);
176
177
178 start_read_offset += destlen;
179 start_read_offset %= ring_buffer_size;
180
181 return start_read_offset;
182}
183
184
7581578d 185/*
7581578d
S
186 * Helper routine to copy from source to ring buffer.
187 * Assume there is enough room. Handles wrap-around in dest case only!!
7581578d
S
188 */
189static u32 hv_copyto_ringbuffer(
fc8c72eb
HZ
190 struct hv_ring_buffer_info *ring_info,
191 u32 start_write_offset,
192 void *src,
7581578d
S
193 u32 srclen)
194{
195 void *ring_buffer = hv_get_ring_buffer(ring_info);
196 u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
197 u32 frag_len;
198
199 /* wrap-around detected! */
200 if (srclen > ring_buffer_size - start_write_offset) {
201 frag_len = ring_buffer_size - start_write_offset;
202 memcpy(ring_buffer + start_write_offset, src, frag_len);
203 memcpy(ring_buffer, src + frag_len, srclen - frag_len);
204 } else
205 memcpy(ring_buffer + start_write_offset, src, srclen);
3e7ee490 206
7581578d
S
207 start_write_offset += srclen;
208 start_write_offset %= ring_buffer_size;
209
210 return start_write_offset;
211}
3e7ee490 212
822f18d4 213/* Get various debug metrics for the specified ring buffer. */
a75b61d5 214void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
80682b7a 215 struct hv_ring_buffer_debug_info *debug_info)
3e7ee490 216{
fc8c72eb
HZ
217 u32 bytes_avail_towrite;
218 u32 bytes_avail_toread;
3e7ee490 219
fc8c72eb 220 if (ring_info->ring_buffer) {
2b8a912e 221 hv_get_ringbuffer_availbytes(ring_info,
fc8c72eb
HZ
222 &bytes_avail_toread,
223 &bytes_avail_towrite);
3e7ee490 224
fc8c72eb
HZ
225 debug_info->bytes_avail_toread = bytes_avail_toread;
226 debug_info->bytes_avail_towrite = bytes_avail_towrite;
82f8bd40 227 debug_info->current_read_index =
fc8c72eb 228 ring_info->ring_buffer->read_index;
82f8bd40 229 debug_info->current_write_index =
fc8c72eb 230 ring_info->ring_buffer->write_index;
82f8bd40 231 debug_info->current_interrupt_mask =
fc8c72eb 232 ring_info->ring_buffer->interrupt_mask;
3e7ee490
HJ
233 }
234}
235
822f18d4 236/* Initialize the ring buffer. */
72a95cbc 237int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
fc8c72eb 238 void *buffer, u32 buflen)
3e7ee490 239{
4a1b3acc 240 if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
3324fb40 241 return -EINVAL;
3e7ee490 242
fc8c72eb 243 memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
3e7ee490 244
fc8c72eb
HZ
245 ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
246 ring_info->ring_buffer->read_index =
247 ring_info->ring_buffer->write_index = 0;
3e7ee490 248
822f18d4 249 /* Set the feature bit for enabling flow control. */
046c7911
S
250 ring_info->ring_buffer->feature_bits.value = 1;
251
fc8c72eb
HZ
252 ring_info->ring_size = buflen;
253 ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
3e7ee490 254
fc8c72eb 255 spin_lock_init(&ring_info->ring_lock);
3e7ee490
HJ
256
257 return 0;
258}
259
822f18d4 260/* Cleanup the ring buffer. */
2dba688b 261void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
3e7ee490 262{
3e7ee490
HJ
263}
264
822f18d4 265/* Write to the ring buffer. */
633c4dce 266int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
fe760e4d 267 struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
3e7ee490 268{
4408f531 269 int i = 0;
fc8c72eb 270 u32 bytes_avail_towrite;
fc8c72eb 271 u32 totalbytes_towrite = 0;
3e7ee490 272
66a60543 273 u32 next_write_location;
98fa8cf4 274 u32 old_write;
fc8c72eb 275 u64 prev_indices = 0;
fe760e4d 276 unsigned long flags = 0;
3e7ee490 277
011a7c3c
S
278 for (i = 0; i < kv_count; i++)
279 totalbytes_towrite += kv_list[i].iov_len;
3e7ee490 280
fc8c72eb 281 totalbytes_towrite += sizeof(u64);
3e7ee490 282
fe760e4d
S
283 if (lock)
284 spin_lock_irqsave(&outring_info->ring_lock, flags);
3e7ee490 285
a6341f00 286 bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
3e7ee490 287
822f18d4
VK
288 /*
289 * If there is only room for the packet, assume it is full.
290 * Otherwise, the next time around, we think the ring buffer
291 * is empty since the read index == write index.
292 */
fc8c72eb 293 if (bytes_avail_towrite <= totalbytes_towrite) {
fe760e4d
S
294 if (lock)
295 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
d2598f01 296 return -EAGAIN;
3e7ee490
HJ
297 }
298
454f18a9 299 /* Write to the ring buffer */
2b8a912e 300 next_write_location = hv_get_next_write_location(outring_info);
3e7ee490 301
98fa8cf4
S
302 old_write = next_write_location;
303
011a7c3c 304 for (i = 0; i < kv_count; i++) {
2b8a912e 305 next_write_location = hv_copyto_ringbuffer(outring_info,
fc8c72eb 306 next_write_location,
011a7c3c
S
307 kv_list[i].iov_base,
308 kv_list[i].iov_len);
3e7ee490
HJ
309 }
310
454f18a9 311 /* Set previous packet start */
2b8a912e 312 prev_indices = hv_get_ring_bufferindices(outring_info);
3e7ee490 313
2b8a912e 314 next_write_location = hv_copyto_ringbuffer(outring_info,
fc8c72eb
HZ
315 next_write_location,
316 &prev_indices,
b219b3f7 317 sizeof(u64));
3e7ee490 318
98fa8cf4 319 /* Issue a full memory barrier before updating the write index */
dcd0eeca 320 virt_mb();
3e7ee490 321
454f18a9 322 /* Now, update the write location */
2b8a912e 323 hv_set_next_write_location(outring_info, next_write_location);
3e7ee490 324
3e7ee490 325
fe760e4d
S
326 if (lock)
327 spin_unlock_irqrestore(&outring_info->ring_lock, flags);
98fa8cf4
S
328
329 *signal = hv_need_to_signal(old_write, outring_info);
3e7ee490
HJ
330 return 0;
331}
332
940b68e2
VK
333int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
334 void *buffer, u32 buflen, u32 *buffer_actual_len,
335 u64 *requestid, bool *signal, bool raw)
3e7ee490 336{
fc8c72eb
HZ
337 u32 bytes_avail_toread;
338 u32 next_read_location = 0;
339 u64 prev_indices = 0;
940b68e2
VK
340 struct vmpacket_descriptor desc;
341 u32 offset;
342 u32 packetlen;
343 int ret = 0;
3e7ee490 344
fc8c72eb 345 if (buflen <= 0)
a16e1485 346 return -EINVAL;
3e7ee490 347
3e7ee490 348
940b68e2
VK
349 *buffer_actual_len = 0;
350 *requestid = 0;
351
a6341f00 352 bytes_avail_toread = hv_get_bytes_to_read(inring_info);
454f18a9 353 /* Make sure there is something to read */
940b68e2
VK
354 if (bytes_avail_toread < sizeof(desc)) {
355 /*
356 * No error is set when there is even no header, drivers are
357 * supposed to analyze buffer_actual_len.
358 */
3eba9a77 359 return ret;
940b68e2 360 }
3e7ee490 361
940b68e2
VK
362 next_read_location = hv_get_next_read_location(inring_info);
363 next_read_location = hv_copyfrom_ringbuffer(inring_info, &desc,
364 sizeof(desc),
365 next_read_location);
366
367 offset = raw ? 0 : (desc.offset8 << 3);
368 packetlen = (desc.len8 << 3) - offset;
369 *buffer_actual_len = packetlen;
370 *requestid = desc.trans_id;
371
3eba9a77
S
372 if (bytes_avail_toread < packetlen + offset)
373 return -EAGAIN;
940b68e2 374
3eba9a77
S
375 if (packetlen > buflen)
376 return -ENOBUFS;
3e7ee490 377
1ac58644 378 next_read_location =
2b8a912e 379 hv_get_next_readlocation_withoffset(inring_info, offset);
3e7ee490 380
2b8a912e 381 next_read_location = hv_copyfrom_ringbuffer(inring_info,
fc8c72eb 382 buffer,
940b68e2 383 packetlen,
fc8c72eb 384 next_read_location);
3e7ee490 385
2b8a912e 386 next_read_location = hv_copyfrom_ringbuffer(inring_info,
fc8c72eb 387 &prev_indices,
4408f531 388 sizeof(u64),
fc8c72eb 389 next_read_location);
3e7ee490 390
822f18d4
VK
391 /*
392 * Make sure all reads are done before we update the read index since
393 * the writer may start writing to the read area once the read index
394 * is updated.
395 */
dcd0eeca 396 virt_mb();
3e7ee490 397
454f18a9 398 /* Update the read index */
2b8a912e 399 hv_set_next_read_location(inring_info, next_read_location);
3e7ee490 400
a389fcfd 401 *signal = hv_need_to_signal_on_read(inring_info);
c2b8e520 402
940b68e2 403 return ret;
b5f53dde 404}