parameter.
1: The multi-queue block layer is instantiated with a hardware dispatch
queue for each CPU node in the system.
+
+use_lightnvm=[0/1]: Default: 0
+ Register device with LightNVM. Requires blk-mq to be used.
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
W: http://github/OpenChannelSSD
+L: linux-block@vger.kernel.org
S: Maintained
F: drivers/lightnvm/
F: include/linux/lightnvm.h
if (vcpu->arch.power_off || vcpu->arch.pause)
vcpu_sleep(vcpu);
- /*
- * Disarming the background timer must be done in a
- * preemptible context, as this call may sleep.
- */
- kvm_timer_flush_hwstate(vcpu);
-
/*
* Preparing the interrupts to be injected also
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
preempt_disable();
+ kvm_timer_flush_hwstate(vcpu);
kvm_vgic_flush_hwstate(vcpu);
local_irq_disable();
__kvm_flush_dcache_pud(pud);
}
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+ return !pfn_valid(pfn);
+}
+
/**
* stage2_dissolve_pmd() - clear and flush huge PMD entry
* @kvm: pointer to kvm structure.
kvm_tlb_flush_vmid_ipa(kvm, addr);
/* No need to invalidate the cache for device mappings */
- if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ if (!kvm_is_device_pfn(__phys_to_pfn(addr)))
kvm_flush_dcache_pte(old_pte);
put_page(virt_to_page(pte));
pte = pte_offset_kernel(pmd, addr);
do {
- if (!pte_none(*pte) &&
- (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+ if (!pte_none(*pte) && !kvm_is_device_pfn(__phys_to_pfn(addr)))
kvm_flush_dcache_pte(*pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
}
return kvm_vcpu_dabt_iswrite(vcpu);
}
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
- return !pfn_valid(pfn);
-}
-
/**
* stage2_wp_ptes - write protect PMD range
* @pmd: pointer to pmd entry
If unsure, say Y.
+config ARM64_ERRATUM_834220
+ bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
+ depends on KVM
+ default y
+ help
+ This option adds an alternative code sequence to work around ARM
+ erratum 834220 on Cortex-A57 parts up to r1p2.
+
+ Affected Cortex-A57 parts might report a Stage 2 translation
+ fault as the result of a Stage 1 fault for load crossing a
+ page boundary when there is a permission or device memory
+ alignment fault at Stage 1 and a translation fault at Stage 2.
+
+ The workaround is to verify that the Stage 1 translation
+ doesn't generate a fault before handling the Stage 2 fault.
+ Please note that this does not necessarily enable the workaround,
+ as it depends on the alternative framework, which will only patch
+ the kernel if an affected CPU is detected.
+
+ If unsure, say Y.
+
config ARM64_ERRATUM_845719
bool "Cortex-A53: 845719: a load might read incorrect data"
depends on COMPAT
#define ARM64_HAS_PAN 4
#define ARM64_HAS_LSE_ATOMICS 5
#define ARM64_WORKAROUND_CAVIUM_23154 6
+#define ARM64_WORKAROUND_834220 7
-#define ARM64_NCAPS 7
+#define ARM64_NCAPS 8
#ifndef __ASSEMBLY__
*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
}
+/*
+ * vcpu_reg should always be passed a register number coming from a
+ * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32
+ * with banked registers.
+ */
static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
{
- if (vcpu_mode_is_32bit(vcpu))
- return vcpu_reg32(vcpu, reg_num);
-
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
}
(1 << MIDR_VARIANT_SHIFT) | 2),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_834220
+ {
+ /* Cortex-A57 r0p0 - r1p2 */
+ .desc = "ARM erratum 834220",
+ .capability = ARM64_WORKAROUND_834220,
+ MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 2),
+ },
+#endif
#ifdef CONFIG_ARM64_ERRATUM_845719
{
/* Cortex-A53 r0p[01234] */
ENDPROC(__kvm_flush_vm_context)
__kvm_hyp_panic:
+ // Stash PAR_EL1 before corrupting it in __restore_sysregs
+ mrs x0, par_el1
+ push x0, xzr
+
// Guess the context by looking at VTTBR:
// If zero, then we're already a host.
// Otherwise restore a minimal host context before panicing.
mrs x3, esr_el2
mrs x4, far_el2
mrs x5, hpfar_el2
- mrs x6, par_el1
+ pop x6, xzr // active context PAR_EL1
mrs x7, tpidr_el2
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
ENDPROC(__kvm_hyp_panic)
__hyp_panic_str:
- .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+ .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
.align 2
b.ne 1f // Not an abort we care about
/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
and x2, x1, #ESR_ELx_FSC_TYPE
cmp x2, #FSC_PERM
b.ne 1f // Not a permission fault
+alternative_else
+ nop // Use the permission fault path to
+ nop // check for a valid S1 translation,
+ nop // regardless of the ESR value.
+alternative_endif
/*
* Check for Stage-1 page table walk, which is guaranteed
/* Note: These now point to the banked copies */
*vcpu_spsr(vcpu) = new_spsr_value;
- *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+ *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
if (sctlr & (1 << 13))
base = (inst >> 21) & 0x1f;
op_inst = (inst >> 16) & 0x1f;
- offset = inst & 0xffff;
+ offset = (int16_t)inst;
cache = (inst >> 16) & 0x3;
op = (inst >> 18) & 0x7;
FEXPORT(__kvm_mips_load_asid)
/* Set the ASID for the Guest Kernel */
- INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */
- /* addresses shift to 0x80000000 */
- bltz t0, 1f /* If kernel */
+ PTR_L t0, VCPU_COP0(k1)
+ LONG_L t0, COP0_STATUS(t0)
+ andi t0, KSU_USER | ST0_ERL | ST0_EXL
+ xori t0, KSU_USER
+ bnez t0, 1f /* If kernel */
INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */
INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */
1:
mtc0 t0, CP0_EPC
/* Set the ASID for the Guest Kernel */
- INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */
- /* addresses shift to 0x80000000 */
- bltz t0, 1f /* If kernel */
+ PTR_L t0, VCPU_COP0(k1)
+ LONG_L t0, COP0_STATUS(t0)
+ andi t0, KSU_USER | ST0_ERL | ST0_EXL
+ xori t0, KSU_USER
+ bnez t0, 1f /* If kernel */
INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */
INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */
1:
if (!gebase) {
err = -ENOMEM;
- goto out_free_cpu;
+ goto out_uninit_cpu;
}
kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
ALIGN(size, PAGE_SIZE), gebase);
out_free_gebase:
kfree(gebase);
+out_uninit_cpu:
+ kvm_vcpu_uninit(vcpu);
+
out_free_cpu:
kfree(vcpu);
src_id, 0);
/* sending vcpu invalid */
- if (src_id >= KVM_MAX_VCPUS ||
- kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+ if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
if (sclp.has_sigpif)
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
irq->u.emerg.code, 0);
+ /* sending vcpu invalid */
+ if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+ return -EINVAL;
+
set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
r = 0;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
- if (MACHINE_HAS_VX) {
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (MACHINE_HAS_VX) {
set_kvm_facility(kvm->arch.model.fac->mask, 129);
set_kvm_facility(kvm->arch.model.fac->list, 129);
r = 0;
} else
r = -EINVAL;
+ mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)");
break;
kvm_s390_get_regs_rre(vcpu, ®1, ®2);
- if (!MACHINE_HAS_PFMF)
+ if (!test_kvm_facility(vcpu->kvm, 8))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
u16 cpu_addr, u32 parameter, u64 *status_reg)
{
int rc;
- struct kvm_vcpu *dst_vcpu;
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
- if (cpu_addr >= KVM_MAX_VCPUS)
- return SIGP_CC_NOT_OPERATIONAL;
-
- dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
if (order_code == SIGP_EXTERNAL_CALL) {
- dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
kvm_s390_vcpu_wakeup(dest_vcpu);
switch (type) {
case VMX_VPID_EXTENT_ALL_CONTEXT:
- if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
- nested_vmx_failValid(vcpu,
- VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return 1;
- }
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
nested_vmx_succeed(vcpu);
break;
return 0;
}
+static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+{
+ return (!lapic_in_kernel(vcpu) ||
+ kvm_apic_accept_pic_intr(vcpu));
+}
+
+/*
+ * if userspace requested an interrupt window, check that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_interrupt_allowed(vcpu) &&
+ !kvm_cpu_has_interrupt(vcpu) &&
+ !kvm_event_needs_reinjection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+}
+
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
return -EEXIST;
vcpu->arch.pending_external_vector = irq->irq;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
}
-/*
- * Check if userspace requested an interrupt window, and that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
- if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
- return false;
-
- if (kvm_cpu_has_interrupt(vcpu))
- return false;
-
- return (irqchip_split(vcpu->kvm)
- ? kvm_apic_accept_pic_intr(vcpu)
- : kvm_arch_interrupt_allowed(vcpu));
+ return vcpu->run->request_interrupt_window &&
+ likely(!pic_in_kernel(vcpu->kvm));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (!irqchip_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_arch_interrupt_allowed(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu) &&
- !kvm_event_needs_reinjection(vcpu);
- else if (!pic_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_apic_accept_pic_intr(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu);
- else
- kvm_run->ready_for_interrupt_injection = 1;
+ kvm_run->ready_for_interrupt_injection =
+ pic_in_kernel(vcpu->kvm) ||
+ kvm_vcpu_ready_for_interrupt_injection(vcpu);
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
- bool req_int_win = !lapic_in_kernel(vcpu) &&
- vcpu->run->request_interrupt_window;
+ bool req_int_win =
+ dm_request_for_irq_injection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+
bool req_immediate_exit = false;
if (vcpu->requests) {
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
- if (dm_request_for_irq_injection(vcpu)) {
+ if (dm_request_for_irq_injection(vcpu) &&
+ kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
struct bio_vec bv, bvprv, *bvprvp = NULL;
struct bvec_iter iter;
unsigned seg_size = 0, nsegs = 0, sectors = 0;
+ unsigned front_seg_size = bio->bi_seg_front_size;
+ bool do_split = true;
+ struct bio *new = NULL;
bio_for_each_segment(bv, bio, iter) {
if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
seg_size += bv.bv_len;
bvprv = bv;
- bvprvp = &bv;
+ bvprvp = &bvprv;
sectors += bv.bv_len >> 9;
continue;
}
nsegs++;
bvprv = bv;
- bvprvp = &bv;
+ bvprvp = &bvprv;
seg_size = bv.bv_len;
sectors += bv.bv_len >> 9;
+
+ if (nsegs == 1 && seg_size > front_seg_size)
+ front_seg_size = seg_size;
}
- *segs = nsegs;
- return NULL;
+ do_split = false;
split:
*segs = nsegs;
- return bio_split(bio, sectors, GFP_NOIO, bs);
+
+ if (do_split) {
+ new = bio_split(bio, sectors, GFP_NOIO, bs);
+ if (new)
+ bio = new;
+ }
+
+ bio->bi_seg_front_size = front_seg_size;
+ if (seg_size > bio->bi_seg_back_size)
+ bio->bi_seg_back_size = seg_size;
+
+ return do_split ? new : NULL;
}
void blk_queue_split(struct request_queue *q, struct bio **bio,
if (sg)
sg_mark_end(sg);
+ /*
+ * Something must have been wrong if the figured number of
+ * segment is bigger than number of req's physical segments
+ */
+ WARN_ON(nsegs > rq->nr_phys_segments);
+
return nsegs;
}
EXPORT_SYMBOL(blk_rq_map_sg);
blk_mq_bio_to_request(rq, bio);
/*
- * we do limited pluging. If bio can be merged, do merge.
+ * We do limited pluging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be
* issued. So the plug list will have one request at most
*/
if (plug) {
/*
* The plug list might get flushed before this. If that
- * happens, same_queue_rq is invalid and plug list is empty
- **/
+ * happens, same_queue_rq is invalid and plug list is
+ * empty
+ */
if (same_queue_rq && !list_empty(&plug->mq_list)) {
old_rq = same_queue_rq;
list_del_init(&old_rq->queuelist);
blk_mq_bio_to_request(rq, bio);
if (!request_count)
trace_block_plug(q);
- else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+
+ blk_mq_put_ctx(data.ctx);
+
+ if (request_count >= BLK_MAX_REQUEST_COUNT) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
}
+
list_add_tail(&rq->queuelist, &plug->mq_list);
- blk_mq_put_ctx(data.ctx);
return cookie;
}
{
if (blk_mark_rq_complete(req))
return;
- blk_delete_timer(req);
- if (req->q->mq_ops)
+
+ if (req->q->mq_ops) {
blk_mq_rq_timed_out(req, false);
- else
+ } else {
+ blk_delete_timer(req);
blk_rq_timed_out(req);
+ }
}
EXPORT_SYMBOL_GPL(blk_abort_request);
static int noop_dispatch(struct request_queue *q, int force)
{
struct noop_data *nd = q->elevator->elevator_data;
+ struct request *rq;
- if (!list_empty(&nd->queue)) {
- struct request *rq;
- rq = list_entry(nd->queue.next, struct request, queuelist);
+ rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
+ if (rq) {
list_del_init(&rq->queuelist);
elv_dispatch_sort(q, rq);
return 1;
if (rq->queuelist.prev == &nd->queue)
return NULL;
- return list_entry(rq->queuelist.prev, struct request, queuelist);
+ return list_prev_entry(rq, queuelist);
}
static struct request *
if (rq->queuelist.next == &nd->queue)
return NULL;
- return list_entry(rq->queuelist.next, struct request, queuelist);
+ return list_next_entry(rq, queuelist);
}
static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
Sector sect;
unsigned char *data;
int slot, blocks_in_map;
- unsigned secsize;
+ unsigned secsize, datasize, partoffset;
#ifdef CONFIG_PPC_PMAC
int found_root = 0;
int found_root_goodness = 0;
}
secsize = be16_to_cpu(md->block_size);
put_dev_sector(sect);
- data = read_part_sector(state, secsize/512, §);
+ datasize = round_down(secsize, 512);
+ data = read_part_sector(state, datasize / 512, §);
if (!data)
return -1;
- part = (struct mac_partition *) (data + secsize%512);
+ partoffset = secsize % 512;
+ if (partoffset + sizeof(*part) > datasize)
+ return -1;
+ part = (struct mac_partition *) (data + partoffset);
if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
put_dev_sector(sect);
return 0; /* not a MacOS disk */
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
+obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
-obj-$(CONFIG_NVM) += lightnvm/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/
sector_t capacity;
unsigned int index = 0;
struct kobject *kobj;
- unsigned char thd_name[16];
if (dd->disk)
goto skip_create_disk; /* hw init done, before rebuild */
}
start_service_thread:
- sprintf(thd_name, "mtip_svc_thd_%02d", index);
dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
- dd, dd->numa_node, "%s",
- thd_name);
+ dd, dd->numa_node,
+ "mtip_svc_thd_%02d", index);
if (IS_ERR(dd->mtip_svc_handler)) {
dev_err(&dd->pdev->dev, "service thread failed to start\n");
#include <linux/slab.h>
#include <linux/blk-mq.h>
#include <linux/hrtimer.h>
+#include <linux/lightnvm.h>
struct nullb_cmd {
struct list_head list;
struct nullb_queue *queues;
unsigned int nr_queues;
+ char disk_name[DISK_NAME_LEN];
};
static LIST_HEAD(nullb_list);
static struct mutex lock;
static int null_major;
static int nullb_indexes;
+static struct kmem_cache *ppa_cache;
struct completion_queue {
struct llist_head list;
module_param(nr_devices, int, S_IRUGO);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+static bool use_lightnvm;
+module_param(use_lightnvm, bool, S_IRUGO);
+MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
+
static int irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
{
list_del_init(&nullb->list);
- del_gendisk(nullb->disk);
+ if (use_lightnvm)
+ nvm_unregister(nullb->disk_name);
+ else
+ del_gendisk(nullb->disk);
blk_cleanup_queue(nullb->q);
if (queue_mode == NULL_Q_MQ)
blk_mq_free_tag_set(&nullb->tag_set);
- put_disk(nullb->disk);
+ if (!use_lightnvm)
+ put_disk(nullb->disk);
cleanup_queues(nullb);
kfree(nullb);
}
+#ifdef CONFIG_NVM
+
+static void null_lnvm_end_io(struct request *rq, int error)
+{
+ struct nvm_rq *rqd = rq->end_io_data;
+ struct nvm_dev *dev = rqd->dev;
+
+ dev->mt->end_io(rqd, error);
+
+ blk_put_request(rq);
+}
+
+static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+{
+ struct request *rq;
+ struct bio *bio = rqd->bio;
+
+ rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+ if (IS_ERR(rq))
+ return -ENOMEM;
+
+ rq->cmd_type = REQ_TYPE_DRV_PRIV;
+ rq->__sector = bio->bi_iter.bi_sector;
+ rq->ioprio = bio_prio(bio);
+
+ if (bio_has_data(bio))
+ rq->nr_phys_segments = bio_phys_segments(q, bio);
+
+ rq->__data_len = bio->bi_iter.bi_size;
+ rq->bio = rq->biotail = bio;
+
+ rq->end_io_data = rqd;
+
+ blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
+
+ return 0;
+}
+
+static int null_lnvm_id(struct request_queue *q, struct nvm_id *id)
+{
+ sector_t size = gb * 1024 * 1024 * 1024ULL;
+ sector_t blksize;
+ struct nvm_id_group *grp;
+
+ id->ver_id = 0x1;
+ id->vmnt = 0;
+ id->cgrps = 1;
+ id->cap = 0x3;
+ id->dom = 0x1;
+
+ id->ppaf.blk_offset = 0;
+ id->ppaf.blk_len = 16;
+ id->ppaf.pg_offset = 16;
+ id->ppaf.pg_len = 16;
+ id->ppaf.sect_offset = 32;
+ id->ppaf.sect_len = 8;
+ id->ppaf.pln_offset = 40;
+ id->ppaf.pln_len = 8;
+ id->ppaf.lun_offset = 48;
+ id->ppaf.lun_len = 8;
+ id->ppaf.ch_offset = 56;
+ id->ppaf.ch_len = 8;
+
+ do_div(size, bs); /* convert size to pages */
+ do_div(size, 256); /* concert size to pgs pr blk */
+ grp = &id->groups[0];
+ grp->mtype = 0;
+ grp->fmtype = 0;
+ grp->num_ch = 1;
+ grp->num_pg = 256;
+ blksize = size;
+ do_div(size, (1 << 16));
+ grp->num_lun = size + 1;
+ do_div(blksize, grp->num_lun);
+ grp->num_blk = blksize;
+ grp->num_pln = 1;
+
+ grp->fpg_sz = bs;
+ grp->csecs = bs;
+ grp->trdt = 25000;
+ grp->trdm = 25000;
+ grp->tprt = 500000;
+ grp->tprm = 500000;
+ grp->tbet = 1500000;
+ grp->tbem = 1500000;
+ grp->mpos = 0x010101; /* single plane rwe */
+ grp->cpar = hw_queue_depth;
+
+ return 0;
+}
+
+static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name)
+{
+ mempool_t *virtmem_pool;
+
+ virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
+ if (!virtmem_pool) {
+ pr_err("null_blk: Unable to create virtual memory pool\n");
+ return NULL;
+ }
+
+ return virtmem_pool;
+}
+
+static void null_lnvm_destroy_dma_pool(void *pool)
+{
+ mempool_destroy(pool);
+}
+
+static void *null_lnvm_dev_dma_alloc(struct request_queue *q, void *pool,
+ gfp_t mem_flags, dma_addr_t *dma_handler)
+{
+ return mempool_alloc(pool, mem_flags);
+}
+
+static void null_lnvm_dev_dma_free(void *pool, void *entry,
+ dma_addr_t dma_handler)
+{
+ mempool_free(entry, pool);
+}
+
+static struct nvm_dev_ops null_lnvm_dev_ops = {
+ .identity = null_lnvm_id,
+ .submit_io = null_lnvm_submit_io,
+
+ .create_dma_pool = null_lnvm_create_dma_pool,
+ .destroy_dma_pool = null_lnvm_destroy_dma_pool,
+ .dev_dma_alloc = null_lnvm_dev_dma_alloc,
+ .dev_dma_free = null_lnvm_dev_dma_free,
+
+ /* Simulate nvme protocol restriction */
+ .max_phys_sect = 64,
+};
+#else
+static struct nvm_dev_ops null_lnvm_dev_ops;
+#endif /* CONFIG_NVM */
+
static int null_open(struct block_device *bdev, fmode_t mode)
{
return 0;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
- disk = nullb->disk = alloc_disk_node(1, home_node);
- if (!disk) {
- rv = -ENOMEM;
- goto out_cleanup_blk_queue;
- }
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
blk_queue_logical_block_size(nullb->q, bs);
blk_queue_physical_block_size(nullb->q, bs);
+ sprintf(nullb->disk_name, "nullb%d", nullb->index);
+
+ if (use_lightnvm) {
+ rv = nvm_register(nullb->q, nullb->disk_name,
+ &null_lnvm_dev_ops);
+ if (rv)
+ goto out_cleanup_blk_queue;
+ goto done;
+ }
+
+ disk = nullb->disk = alloc_disk_node(1, home_node);
+ if (!disk) {
+ rv = -ENOMEM;
+ goto out_cleanup_lightnvm;
+ }
size = gb * 1024 * 1024 * 1024ULL;
set_capacity(disk, size >> 9);
disk->fops = &null_fops;
disk->private_data = nullb;
disk->queue = nullb->q;
- sprintf(disk->disk_name, "nullb%d", nullb->index);
+ strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+
add_disk(disk);
+done:
return 0;
+out_cleanup_lightnvm:
+ if (use_lightnvm)
+ nvm_unregister(nullb->disk_name);
out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q);
out_cleanup_tags:
bs = PAGE_SIZE;
}
+ if (use_lightnvm && bs != 4096) {
+ pr_warn("null_blk: LightNVM only supports 4k block size\n");
+ pr_warn("null_blk: defaults block size to 4k\n");
+ bs = 4096;
+ }
+
+ if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+ pr_warn("null_blk: LightNVM only supported for blk-mq\n");
+ pr_warn("null_blk: defaults queue mode to blk-mq\n");
+ queue_mode = NULL_Q_MQ;
+ }
+
if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
if (submit_queues < nr_online_nodes) {
pr_warn("null_blk: submit_queues param is set to %u.",
if (null_major < 0)
return null_major;
+ if (use_lightnvm) {
+ ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
+ 0, 0, NULL);
+ if (!ppa_cache) {
+ pr_err("null_blk: unable to create ppa cache\n");
+ return -ENOMEM;
+ }
+ }
+
for (i = 0; i < nr_devices; i++) {
if (null_add_dev()) {
unregister_blkdev(null_major, "nullb");
- return -EINVAL;
+ goto err_ppa;
}
}
pr_info("null: module loaded\n");
return 0;
+err_ppa:
+ kmem_cache_destroy(ppa_cache);
+ return -EINVAL;
}
static void __exit null_exit(void)
null_del_dev(nullb);
}
mutex_unlock(&lock);
+
+ kmem_cache_destroy(ppa_cache);
}
module_init(null_init);
}
EXPORT_SYMBOL(nvm_erase_blk);
-static void nvm_core_free(struct nvm_dev *dev)
-{
- kfree(dev);
-}
-
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
- dev->addr_mode = id->ppat;
- dev->addr_format = id->ppaf;
+ memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
dev->plane_mode = NVM_PLANE_SINGLE;
dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
+ if (grp->mtype != 0) {
+ pr_err("nvm: memory type not supported\n");
+ return -EINVAL;
+ }
+
+ if (grp->fmtype != 0 && grp->fmtype != 1) {
+ pr_err("nvm: flash type not supported\n");
+ return -EINVAL;
+ }
+
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
if (dev->mt)
dev->mt->unregister_mgr(dev);
-
- nvm_core_free(dev);
}
static int nvm_init(struct nvm_dev *dev)
{
struct nvmm_type *mt;
- int ret = 0;
+ int ret = -EINVAL;
if (!dev->q || !dev->ops)
- return -EINVAL;
+ return ret;
if (dev->ops->identity(dev->q, &dev->identity)) {
pr_err("nvm: device could not be identified\n");
- ret = -EINVAL;
goto err;
}
dev->nr_chnls);
return 0;
err:
- nvm_free(dev);
pr_err("nvm: failed to initialize nvm\n");
return ret;
}
if (ret)
goto err_init;
- down_write(&nvm_lock);
- list_add(&dev->devices, &nvm_devices);
- up_write(&nvm_lock);
-
if (dev->ops->max_phys_sect > 1) {
dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
"ppalist");
if (!dev->ppalist_pool) {
pr_err("nvm: could not create ppa pool\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_init;
}
} else if (dev->ops->max_phys_sect > 256) {
pr_info("nvm: max sectors supported is 256.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_init;
}
+ down_write(&nvm_lock);
+ list_add(&dev->devices, &nvm_devices);
+ up_write(&nvm_lock);
+
return 0;
err_init:
kfree(dev);
return;
}
- nvm_exit(dev);
-
down_write(&nvm_lock);
list_del(&dev->devices);
up_write(&nvm_lock);
+
+ nvm_exit(dev);
+ kfree(dev);
}
EXPORT_SYMBOL(nvm_unregister);
lockdep_assert_held(&nvm_lock);
del_gendisk(tdisk);
+ blk_cleanup_queue(q);
+
if (tt->exit)
tt->exit(tdisk->private_data);
- blk_cleanup_queue(q);
-
put_disk(tdisk);
list_del(&t->list);
if (!dev->mt)
return 0;
- dev->mt->free_blocks_print(dev);
+ dev->mt->lun_info_print(dev);
return 0;
}
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
+ lun->vlun.nr_inuse_blocks = 0;
+ lun->vlun.nr_bad_blocks = 0;
}
return 0;
}
-static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
+static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
void *private)
{
struct gen_nvm *gn = private;
- struct gen_lun *lun = &gn->luns[lun_id];
+ struct nvm_dev *dev = gn->dev;
+ struct gen_lun *lun;
struct nvm_block *blk;
int i;
- if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
- return 0;
+ ppa = dev_to_generic_addr(gn->dev, ppa);
+ lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
+
+ for (i = 0; i < nr_blocks; i++) {
+ if (blks[i] == 0)
+ continue;
- i = -1;
- while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
blk = &lun->vlun.blocks[i];
if (!blk) {
pr_err("gennvm: BB data is out of bounds.\n");
}
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
}
return 0;
list_move_tail(&blk->list, &lun->used_list);
blk->type = 1;
lun->vlun.nr_free_blocks--;
+ lun->vlun.nr_inuse_blocks++;
}
}
block->id = cur_block_id++;
/* First block is reserved for device */
- if (unlikely(lun_iter == 0 && blk_iter == 0))
+ if (unlikely(lun_iter == 0 && blk_iter == 0)) {
+ lun->vlun.nr_free_blocks--;
continue;
+ }
list_add_tail(&block->list, &lun->free_list);
}
if (dev->ops->get_bb_tbl) {
- ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
- dev->blks_per_lun, gennvm_block_bb, gn);
+ struct ppa_addr ppa;
+
+ ppa.ppa = 0;
+ ppa.g.ch = lun->vlun.chnl_id;
+ ppa.g.lun = lun->vlun.id;
+ ppa = generic_to_dev_addr(dev, ppa);
+
+ ret = dev->ops->get_bb_tbl(dev->q, ppa,
+ dev->blks_per_lun,
+ gennvm_block_bb, gn);
if (ret)
pr_err("gennvm: could not read BB table\n");
}
if (!gn)
return -ENOMEM;
+ gn->dev = dev;
gn->nr_luns = dev->nr_luns;
dev->mp = gn;
blk->type = 1;
lun->vlun.nr_free_blocks--;
+ lun->vlun.nr_inuse_blocks++;
spin_unlock(&vlun->lock);
out:
case 1:
list_move_tail(&blk->list, &lun->free_list);
lun->vlun.nr_free_blocks++;
+ lun->vlun.nr_inuse_blocks--;
blk->type = 0;
break;
case 2:
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_inuse_blocks--;
break;
default:
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
blk->id, blk->type);
list_move_tail(&blk->list, &lun->bb_list);
+ lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_inuse_blocks--;
}
spin_unlock(&vlun->lock);
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = addr_to_generic_mode(dev,
+ rqd->ppa_list[i] = dev_to_generic_addr(dev,
rqd->ppa_list[i]);
} else {
- rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
+ rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
}
}
if (rqd->nr_pages > 1) {
for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = generic_to_addr_mode(dev,
+ rqd->ppa_list[i] = generic_to_dev_addr(dev,
rqd->ppa_list[i]);
} else {
- rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
+ rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
}
}
{
int i;
- if (!dev->ops->set_bb)
+ if (!dev->ops->set_bb_tbl)
return;
- if (dev->ops->set_bb(dev->q, rqd, 1))
+ if (dev->ops->set_bb_tbl(dev->q, rqd, 1))
return;
gennvm_addr_to_generic_mode(dev, rqd);
return &gn->luns[lunid].vlun;
}
-static void gennvm_free_blocks_print(struct nvm_dev *dev)
+static void gennvm_lun_info_print(struct nvm_dev *dev)
{
struct gen_nvm *gn = dev->mp;
struct gen_lun *lun;
unsigned int i;
- gennvm_for_each_lun(gn, lun, i)
- pr_info("%s: lun%8u\t%u\n",
- dev->name, i, lun->vlun.nr_free_blocks);
+
+ gennvm_for_each_lun(gn, lun, i) {
+ spin_lock(&lun->vlun.lock);
+
+ pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+ dev->name, i,
+ lun->vlun.nr_free_blocks,
+ lun->vlun.nr_inuse_blocks,
+ lun->vlun.nr_bad_blocks);
+
+ spin_unlock(&lun->vlun.lock);
+ }
}
static struct nvmm_type gennvm = {
.erase_blk = gennvm_erase_blk,
.get_lun = gennvm_get_lun,
- .free_blocks_print = gennvm_free_blocks_print,
+ .lun_info_print = gennvm_lun_info_print,
};
static int __init gennvm_module_init(void)
};
struct gen_nvm {
+ struct nvm_dev *dev;
+
int nr_luns;
struct gen_lun *luns;
};
return blk->id * rrpc->dev->pgs_per_blk;
}
+static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
+{
+ struct ppa_addr l;
+ int secs, pgs, blks, luns;
+ sector_t ppa = r.ppa;
+
+ l.ppa = 0;
+
+ div_u64_rem(ppa, dev->sec_per_pg, &secs);
+ l.g.sec = secs;
+
+ sector_div(ppa, dev->sec_per_pg);
+ div_u64_rem(ppa, dev->sec_per_blk, &pgs);
+ l.g.pg = pgs;
+
+ sector_div(ppa, dev->pgs_per_blk);
+ div_u64_rem(ppa, dev->blks_per_lun, &blks);
+ l.g.blk = blks;
+
+ sector_div(ppa, dev->blks_per_lun);
+ div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+ l.g.lun = luns;
+
+ sector_div(ppa, dev->luns_per_chnl);
+ l.g.ch = ppa;
+
+ return l;
+}
+
static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
{
struct ppa_addr paddr;
paddr.ppa = addr;
- return __linear_to_generic_addr(dev, paddr);
+ return linear_to_generic_addr(dev, paddr);
}
/* requires lun->lock taken */
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
- DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
+ DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
+ DM_CRYPT_EXIT_THREAD};
/*
* The fields in here must be read only after initialization.
if (!RB_EMPTY_ROOT(&cc->write_tree))
goto pop_from_list;
+ if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
+ spin_unlock_irq(&cc->write_thread_wait.lock);
+ break;
+ }
+
__set_current_state(TASK_INTERRUPTIBLE);
__add_wait_queue(&cc->write_thread_wait, &wait);
spin_unlock_irq(&cc->write_thread_wait.lock);
- if (unlikely(kthread_should_stop())) {
- set_task_state(current, TASK_RUNNING);
- remove_wait_queue(&cc->write_thread_wait, &wait);
- break;
- }
-
schedule();
- set_task_state(current, TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
if (!cc)
return;
- if (cc->write_thread)
+ if (cc->write_thread) {
+ spin_lock_irq(&cc->write_thread_wait.lock);
+ set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
+ wake_up_locked(&cc->write_thread_wait);
+ spin_unlock_irq(&cc->write_thread_wait.lock);
kthread_stop(cc->write_thread);
+ }
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
struct block_device **bdev, fmode_t *mode)
{
struct multipath *m = ti->private;
- struct pgpath *pgpath;
unsigned long flags;
int r;
- r = 0;
-
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pgpath)
__choose_pgpath(m, 0);
- pgpath = m->current_pgpath;
-
- if (pgpath) {
- *bdev = pgpath->path.dev->bdev;
- *mode = pgpath->path.dev->mode;
+ if (m->current_pgpath) {
+ if (!m->queue_io) {
+ *bdev = m->current_pgpath->path.dev->bdev;
+ *mode = m->current_pgpath->path.dev->mode;
+ r = 0;
+ } else {
+ /* pg_init has not started or completed */
+ r = -ENOTCONN;
+ }
+ } else {
+ /* No path is available */
+ if (m->queue_if_no_path)
+ r = -ENOTCONN;
+ else
+ r = -EIO;
}
- if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
- r = -ENOTCONN;
- else if (!*bdev)
- r = -EIO;
-
spin_unlock_irqrestore(&m->lock, flags);
- if (r == -ENOTCONN && !fatal_signal_pending(current)) {
+ if (r == -ENOTCONN) {
spin_lock_irqsave(&m->lock, flags);
if (!m->current_pg) {
/* Path status changed, redo selection */
case PM_WRITE:
if (old_mode != new_mode)
notify_of_pool_mode_change(pool, "write");
+ pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
dm_pool_metadata_read_write(pool->pmd);
pool->process_bio = process_bio;
pool->process_discard = process_discard_bio;
{
struct thin_c *tc = ti->private;
struct pool *pool = tc->pool;
- struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md);
- if (!pool_limits->discard_granularity)
- return; /* pool's discard support is disabled */
+ if (!pool->pf.discard_enabled)
+ return;
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
out:
dm_put_live_table(md, *srcu_idx);
- if (r == -ENOTCONN) {
+ if (r == -ENOTCONN && !fatal_signal_pending(current)) {
msleep(10);
goto retry;
}
{
struct mapped_device *md = bdev->bd_disk->private_data;
struct dm_target *tgt;
+ struct block_device *tgt_bdev = NULL;
int srcu_idx, r;
- r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+ r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
if (r < 0)
return r;
goto out;
}
- r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ r = __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
out:
dm_put_live_table(md, srcu_idx);
return r;
__le16 cdw14[6];
};
-struct nvme_nvm_bbtbl {
+struct nvme_nvm_getbbtbl {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u64 rsvd[2];
__le64 prp1;
__le64 prp2;
- __le32 prp1_len;
- __le32 prp2_len;
- __le32 lbb;
- __u32 rsvd11[3];
+ __le64 spba;
+ __u32 rsvd4[4];
+};
+
+struct nvme_nvm_setbbtbl {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le64 rsvd[2];
+ __le64 prp1;
+ __le64 prp2;
+ __le64 spba;
+ __le16 nlb;
+ __u8 value;
+ __u8 rsvd3;
+ __u32 rsvd4[3];
};
struct nvme_nvm_erase_blk {
struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
struct nvme_nvm_l2ptbl l2p;
- struct nvme_nvm_bbtbl get_bb;
- struct nvme_nvm_bbtbl set_bb;
+ struct nvme_nvm_getbbtbl get_bb;
+ struct nvme_nvm_setbbtbl set_bb;
struct nvme_nvm_erase_blk erase;
};
};
__u8 num_ch;
__u8 num_lun;
__u8 num_pln;
+ __u8 rsvd1;
__le16 num_blk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
__le16 sos;
+ __le16 rsvd2;
__le32 trdt;
__le32 trdm;
__le32 tprt;
__le32 tbet;
__le32 tbem;
__le32 mpos;
+ __le32 mccap;
__le16 cpar;
- __u8 reserved[913];
+ __u8 reserved[906];
} __packed;
struct nvme_nvm_addr_format {
__u8 ver_id;
__u8 vmnt;
__u8 cgrps;
- __u8 res[5];
+ __u8 res;
__le32 cap;
__le32 dom;
struct nvme_nvm_addr_format ppaf;
- __u8 ppat;
- __u8 resv[223];
+ __u8 resv[228];
struct nvme_nvm_id_group groups[4];
} __packed;
+struct nvme_nvm_bb_tbl {
+ __u8 tblid[4];
+ __le16 verid;
+ __le16 revid;
+ __le32 rvsd1;
+ __le32 tblks;
+ __le32 tfact;
+ __le32 tgrown;
+ __le32 tdresv;
+ __le32 thresv;
+ __le32 rsvd2[8];
+ __u8 blk[0];
+};
+
/*
* Check we didn't inadvertently grow the command struct
*/
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+ BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
}
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
dst->tbet = le32_to_cpu(src->tbet);
dst->tbem = le32_to_cpu(src->tbem);
dst->mpos = le32_to_cpu(src->mpos);
+ dst->mccap = le32_to_cpu(src->mccap);
dst->cpar = le16_to_cpu(src->cpar);
}
static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
{
struct nvme_ns *ns = q->queuedata;
+ struct nvme_dev *dev = ns->dev;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
if (!nvme_nvm_id)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id,
- sizeof(struct nvme_nvm_id));
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ nvme_nvm_id, sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
goto out;
nvm_id->cgrps = nvme_nvm_id->cgrps;
nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
+ memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
+ sizeof(struct nvme_nvm_addr_format));
ret = init_grps(nvm_id, nvme_nvm_id);
out:
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(q) << 9;
+ u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c,
- entries, len);
+ ret = nvme_submit_sync_cmd(dev->admin_q,
+ (struct nvme_command *)&c, entries, len);
if (ret) {
dev_err(dev->dev, "L2P table transfer failed (%d)\n",
ret);
return ret;
}
-static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
- unsigned int nr_blocks,
- nvm_bb_update_fn *update_bbtbl, void *priv)
+static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
+ int nr_blocks, nvm_bb_update_fn *update_bbtbl,
+ void *priv)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- void *bb_bitmap;
- u16 bb_bitmap_size;
+ struct nvme_nvm_bb_tbl *bb_tbl;
+ int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
int ret = 0;
c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
c.get_bb.nsid = cpu_to_le32(ns->ns_id);
- c.get_bb.lbb = cpu_to_le32(lunid);
- bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
- bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
- if (!bb_bitmap)
- return -ENOMEM;
+ c.get_bb.spba = cpu_to_le64(ppa.ppa);
- bitmap_zero(bb_bitmap, nr_blocks);
+ bb_tbl = kzalloc(tblsz, GFP_KERNEL);
+ if (!bb_tbl)
+ return -ENOMEM;
- ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
- bb_bitmap_size);
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ bb_tbl, tblsz);
if (ret) {
dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
- ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
+ if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
+ bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
+ dev_err(dev->dev, "bbt format mismatch\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (le16_to_cpu(bb_tbl->verid) != 1) {
+ ret = -EINVAL;
+ dev_err(dev->dev, "bbt version not supported\n");
+ goto out;
+ }
+
+ if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
+ ret = -EINVAL;
+ dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+ le32_to_cpu(bb_tbl->tblks), nr_blocks);
+ goto out;
+ }
+
+ ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
if (ret) {
ret = -EINTR;
goto out;
}
out:
- kfree(bb_bitmap);
+ kfree(bb_tbl);
+ return ret;
+}
+
+static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd,
+ int type)
+{
+ struct nvme_ns *ns = q->queuedata;
+ struct nvme_dev *dev = ns->dev;
+ struct nvme_nvm_command c = {};
+ int ret = 0;
+
+ c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
+ c.set_bb.nsid = cpu_to_le32(ns->ns_id);
+ c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa);
+ c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
+ c.set_bb.value = type;
+
+ ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ NULL, 0);
+ if (ret)
+ dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
return ret;
}
.get_l2p_tbl = nvme_nvm_get_l2p_tbl,
.get_bb_tbl = nvme_nvm_get_bb_tbl,
+ .set_bb_tbl = nvme_nvm_set_bb_tbl,
.submit_io = nvme_nvm_submit_io,
.erase_block = nvme_nvm_erase_block,
goto retry_cmd;
}
if (blk_integrity_rq(req)) {
- if (blk_rq_count_integrity_sg(req->q, req->bio) != 1)
+ if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
sg_init_table(iod->meta_sg, 1);
if (blk_rq_map_integrity_sg(
- req->q, req->bio, iod->meta_sg) != 1)
+ req->q, req->bio, iod->meta_sg) != 1) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
if (rq_data_dir(req))
nvme_dif_remap(req, nvme_dif_prep);
- if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir))
+ if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
goto error_cmd;
+ }
}
}
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
return;
- writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+ if (likely(nvmeq->cq_vector >= 0))
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
nvmeq->cq_head = head;
nvmeq->cq_phase = phase;
u32 aqa;
u64 cap = lo_hi_readq(&dev->bar->cap);
struct nvme_queue *nvmeq;
- unsigned page_shift = PAGE_SHIFT;
+ /*
+ * default to a 4K page size, with the intention to update this
+ * path in the future to accomodate architectures with differing
+ * kernel and IO page sizes.
+ */
+ unsigned page_shift = 12;
unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
- unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
if (page_shift < dev_page_min) {
dev_err(dev->dev,
1 << page_shift);
return -ENODEV;
}
- if (page_shift > dev_page_max) {
- dev_info(dev->dev,
- "Device maximum page size (%u) smaller than "
- "host (%u); enabling work-around\n",
- 1 << dev_page_max, 1 << page_shift);
- page_shift = dev_page_max;
- }
dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
NVME_CAP_NSSRC(cap) : 0;
if (dev->max_hw_sectors) {
blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
blk_queue_max_segments(ns->queue,
- ((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+ (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
}
if (dev->stripe_size)
blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
{
struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
nvme_put_dq(dq);
+
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
}
static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
*/
static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
{
+ /*
+ * Check for signal early to make process killable when there are
+ * always buffers available
+ */
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
while (!pipe->nrbufs) {
if (!pipe->writers)
return 0;
splice_from_pipe_begin(sd);
do {
+ cond_resched();
ret = splice_from_pipe_next(pipe, sd);
if (ret > 0)
ret = splice_from_pipe_feed(pipe, sd, actor);
inode->i_fop = &sysv_dir_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) {
- if (inode->i_blocks) {
- inode->i_op = &sysv_symlink_inode_operations;
- inode->i_mapping->a_ops = &sysv_aops;
- } else {
- inode->i_op = &simple_symlink_inode_operations;
- inode->i_link = (char *)SYSV_I(inode)->i_data;
- nd_terminate_link(inode->i_link, inode->i_size,
- sizeof(SYSV_I(inode)->i_data) - 1);
- }
+ inode->i_op = &sysv_symlink_inode_operations;
+ inode->i_mapping->a_ops = &sysv_aops;
} else
init_special_inode(inode, inode->i_mode, rdev);
}
struct irq_phys_map *map, bool level);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
int virt_irq, int irq);
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
(vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
idx++)
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ if (vcpu->vcpu_id == id)
+ return vcpu;
+ return NULL;
+}
+
#define kvm_for_each_memslot(memslot, slots) \
for (memslot = &slots->memslots[0]; \
memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
struct nvm_id_group {
u8 mtype;
u8 fmtype;
- u16 res16;
u8 num_ch;
u8 num_lun;
u8 num_pln;
u32 tbet;
u32 tbem;
u32 mpos;
+ u32 mccap;
u16 cpar;
- u8 res[913];
-} __packed;
+};
struct nvm_addr_format {
u8 ch_offset;
u8 pg_len;
u8 sect_offset;
u8 sect_len;
- u8 res[4];
};
struct nvm_id {
u8 ver_id;
u8 vmnt;
u8 cgrps;
- u8 res[5];
u32 cap;
u32 dom;
struct nvm_addr_format ppaf;
- u8 ppat;
- u8 resv[224];
struct nvm_id_group groups[4];
} __packed;
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS (6)
-#define NVM_PG_BITS (16)
#define NVM_BLK_BITS (16)
-#define NVM_LUN_BITS (10)
+#define NVM_PG_BITS (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS (8)
+#define NVM_LUN_BITS (8)
#define NVM_CH_BITS (8)
struct ppa_addr {
+ /* Generic structure for all addresses */
union {
- /* Channel-based PPA format in nand 4x2x2x2x8x10 */
- struct {
- u64 ch : 4;
- u64 sec : 2; /* 4 sectors per page */
- u64 pl : 2; /* 4 planes per LUN */
- u64 lun : 2; /* 4 LUNs per channel */
- u64 pg : 8; /* 256 pages per block */
- u64 blk : 10;/* 1024 blocks per plane */
- u64 resved : 36;
- } chnl;
-
- /* Generic structure for all addresses */
struct {
+ u64 blk : NVM_BLK_BITS;
+ u64 pg : NVM_PG_BITS;
u64 sec : NVM_SEC_BITS;
u64 pl : NVM_PL_BITS;
- u64 pg : NVM_PG_BITS;
- u64 blk : NVM_BLK_BITS;
u64 lun : NVM_LUN_BITS;
u64 ch : NVM_CH_BITS;
} g;
u64 ppa;
};
-} __packed;
+};
struct nvm_rq {
struct nvm_tgt_instance *ins;
struct nvm_block;
typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
nvm_bb_update_fn *, void *);
typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
nvm_id_fn *identity;
nvm_get_l2p_tbl_fn *get_l2p_tbl;
nvm_op_bb_tbl_fn *get_bb_tbl;
- nvm_op_set_bb_fn *set_bb;
+ nvm_op_set_bb_fn *set_bb_tbl;
nvm_submit_io_fn *submit_io;
nvm_erase_blk_fn *erase_block;
nvm_dev_dma_alloc_fn *dev_dma_alloc;
nvm_dev_dma_free_fn *dev_dma_free;
- uint8_t max_phys_sect;
+ unsigned int max_phys_sect;
};
struct nvm_lun {
int lun_id;
int chnl_id;
+ unsigned int nr_inuse_blocks; /* Number of used blocks */
unsigned int nr_free_blocks; /* Number of unused blocks */
+ unsigned int nr_bad_blocks; /* Number of bad blocks */
struct nvm_block *blocks;
spinlock_t lock;
int blks_per_lun;
int sec_size;
int oob_size;
- int addr_mode;
- struct nvm_addr_format addr_format;
+ struct nvm_addr_format ppaf;
/* Calculated/Cached values. These do not reflect the actual usable
* blocks at run-time.
char name[DISK_NAME_LEN];
};
-/* fallback conversion */
-static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
- struct ppa_addr r)
-{
- struct ppa_addr l;
-
- l.ppa = r.g.sec +
- r.g.pg * dev->sec_per_pg +
- r.g.blk * (dev->pgs_per_blk *
- dev->sec_per_pg) +
- r.g.lun * (dev->blks_per_lun *
- dev->pgs_per_blk *
- dev->sec_per_pg) +
- r.g.ch * (dev->blks_per_lun *
- dev->pgs_per_blk *
- dev->luns_per_chnl *
- dev->sec_per_pg);
-
- return l;
-}
-
-/* fallback conversion */
-static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
- struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
{
struct ppa_addr l;
- int secs, pgs, blks, luns;
- sector_t ppa = r.ppa;
- l.ppa = 0;
-
- div_u64_rem(ppa, dev->sec_per_pg, &secs);
- l.g.sec = secs;
-
- sector_div(ppa, dev->sec_per_pg);
- div_u64_rem(ppa, dev->sec_per_blk, &pgs);
- l.g.pg = pgs;
-
- sector_div(ppa, dev->pgs_per_blk);
- div_u64_rem(ppa, dev->blks_per_lun, &blks);
- l.g.blk = blks;
-
- sector_div(ppa, dev->blks_per_lun);
- div_u64_rem(ppa, dev->luns_per_chnl, &luns);
- l.g.lun = luns;
-
- sector_div(ppa, dev->luns_per_chnl);
- l.g.ch = ppa;
+ l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
+ l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
+ l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
+ l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
+ l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
+ l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
return l;
}
-static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
+ struct ppa_addr r)
{
struct ppa_addr l;
- l.ppa = 0;
-
- l.chnl.sec = r.g.sec;
- l.chnl.pl = r.g.pl;
- l.chnl.pg = r.g.pg;
- l.chnl.blk = r.g.blk;
- l.chnl.lun = r.g.lun;
- l.chnl.ch = r.g.ch;
-
- return l;
-}
-
-static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
-{
- struct ppa_addr l;
-
- l.ppa = 0;
-
- l.g.sec = r.chnl.sec;
- l.g.pl = r.chnl.pl;
- l.g.pg = r.chnl.pg;
- l.g.blk = r.chnl.blk;
- l.g.lun = r.chnl.lun;
- l.g.ch = r.chnl.ch;
+ /*
+ * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
+ */
+ l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
+ (((1 << dev->ppaf.blk_len) - 1));
+ l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
+ (((1 << dev->ppaf.pg_len) - 1));
+ l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
+ (((1 << dev->ppaf.sect_len) - 1));
+ l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
+ (((1 << dev->ppaf.pln_len) - 1));
+ l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
+ (((1 << dev->ppaf.lun_len) - 1));
+ l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
+ (((1 << dev->ppaf.ch_len) - 1));
return l;
}
-static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
- struct ppa_addr gppa)
-{
- switch (dev->addr_mode) {
- case NVM_ADDRMODE_LINEAR:
- return __linear_to_generic_addr(dev, gppa);
- case NVM_ADDRMODE_CHANNEL:
- return __chnl_to_generic_addr(gppa);
- default:
- BUG();
- }
- return gppa;
-}
-
-static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
- struct ppa_addr gppa)
-{
- switch (dev->addr_mode) {
- case NVM_ADDRMODE_LINEAR:
- return __generic_to_linear_addr(dev, gppa);
- case NVM_ADDRMODE_CHANNEL:
- return __generic_to_chnl_addr(gppa);
- default:
- BUG();
- }
- return gppa;
-}
-
static inline int ppa_empty(struct ppa_addr ppa_addr)
{
return (ppa_addr.ppa == ADDR_EMPTY);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
struct nvmm_type {
const char *name;
nvmm_get_lun_fn *get_lun;
/* Statistics */
- nvmm_free_blocks_print_fn *free_blocks_print;
+ nvmm_lun_info_print_fn *lun_info_print;
struct list_head list;
};
rcu_read_lock();
if (type != PIDTYPE_PID)
task = task->group_leader;
- pid = get_pid(task->pids[type].pid);
+ pid = get_pid(rcu_dereference(task->pids[type].pid));
rcu_read_unlock();
return pid;
}
if (likely(pid_alive(task))) {
if (type != PIDTYPE_PID)
task = task->group_leader;
- nr = pid_nr_ns(task->pids[type].pid, ns);
+ nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
}
rcu_read_unlock();
o Where possible, any helper functions or other package-wide code shall be
implemented in header files, avoiding the need to compile intermediate object
files.
-o External dependendencies shall remain as minimal as possible. Currently gcc
+o External dependencies shall remain as minimal as possible. Currently gcc
and glibc are the only dependencies.
o Tests return 0 for success and < 0 for failure.
pid_t parent = getppid();
int fd;
void *map1, *map2;
+ int page_size = sysconf(_SC_PAGESIZE);
+
+ ASSERT_LT(0, page_size);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
map1 = (void *)syscall(sysno,
- NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, PAGE_SIZE);
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
EXPECT_NE(MAP_FAILED, map1);
/* mmap2() should never return. */
map2 = (void *)syscall(sysno,
- NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
EXPECT_EQ(MAP_FAILED, map2);
/* The test failed, so clean up the resources. */
- munmap(map1, PAGE_SIZE);
- munmap(map2, PAGE_SIZE);
+ munmap(map1, page_size);
+ munmap(map2, page_size);
close(fd);
}
kvm_timer_update_state(vcpu);
/*
- * If we enter the guest with the virtual input level to the VGIC
- * asserted, then we have already told the VGIC what we need to, and
- * we don't need to exit from the guest until the guest deactivates
- * the already injected interrupt, so therefore we should set the
- * hardware active state to prevent unnecessary exits from the guest.
- *
- * Conversely, if the virtual input level is deasserted, then always
- * clear the hardware active state to ensure that hardware interrupts
- * from the timer triggers a guest exit.
- */
- if (timer->irq.level)
+ * If we enter the guest with the virtual input level to the VGIC
+ * asserted, then we have already told the VGIC what we need to, and
+ * we don't need to exit from the guest until the guest deactivates
+ * the already injected interrupt, so therefore we should set the
+ * hardware active state to prevent unnecessary exits from the guest.
+ *
+ * Also, if we enter the guest with the virtual timer interrupt active,
+ * then it must be active on the physical distributor, because we set
+ * the HW bit and the guest must be able to deactivate the virtual and
+ * physical interrupt at the same time.
+ *
+ * Conversely, if the virtual input level is deasserted and the virtual
+ * interrupt is not active, then always clear the hardware active state
+ * to ensure that hardware interrupts from the timer triggers a guest
+ * exit.
+ */
+ if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
phys_active = true;
else
phys_active = false;
vgic_set_lr(vcpu, lr_nr, vlr);
}
+static bool dist_active_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+{
+ int i;
+
+ for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, i);
+
+ if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE)
+ return true;
+ }
+
+ return dist_active_irq(vcpu);
+}
+
/*
* An interrupt may have been disabled after being made pending on the
* CPU interface (the classic case is a timer running while we're
* may have been serviced from another vcpu. In all cases,
* move along.
*/
- if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
+ if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu))
goto epilog;
/* SGIs */
static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- struct irq_phys_map *map;
- bool phys_active;
bool level_pending;
- int ret;
if (!(vlr.state & LR_HW))
return false;
- map = vgic_irq_map_search(vcpu, vlr.irq);
- BUG_ON(!map);
-
- ret = irq_get_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- &phys_active);
-
- WARN_ON(ret);
-
- if (phys_active)
- return 0;
+ if (vlr.state & LR_STATE_ACTIVE)
+ return false;
spin_lock(&dist->lock);
level_pending = process_queued_irq(vcpu, lr, vlr);
return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
}
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return 0;
-
- return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
-}
-
-
void vgic_kick_vcpus(struct kvm *kvm)
{
struct kvm_vcpu *vcpu;