Merge tag 'for-linus-4.8-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2016 18:35:37 +0000 (11:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Jul 2016 18:35:37 +0000 (11:35 -0700)
Pull xen updates from David Vrabel:
 "Features and fixes for 4.8-rc0:

   - ACPI support for guests on ARM platforms.
   - Generic steal time support for arm and x86.
   - Support cases where kernel cpu is not Xen VCPU number (e.g., if
     in-guest kexec is used).
   - Use the system workqueue instead of a custom workqueue in various
     places"

* tag 'for-linus-4.8-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (47 commits)
  xen: add static initialization of steal_clock op to xen_time_ops
  xen/pvhvm: run xen_vcpu_setup() for the boot CPU
  xen/evtchn: use xen_vcpu_id mapping
  xen/events: fifo: use xen_vcpu_id mapping
  xen/events: use xen_vcpu_id mapping in events_base
  x86/xen: use xen_vcpu_id mapping when pointing vcpu_info to shared_info
  x86/xen: use xen_vcpu_id mapping for HYPERVISOR_vcpu_op
  xen: introduce xen_vcpu_id mapping
  x86/acpi: store ACPI ids from MADT for future usage
  x86/xen: update cpuid.h from Xen-4.7
  xen/evtchn: add IOCTL_EVTCHN_RESTRICT
  xen-blkback: really don't leak mode property
  xen-blkback: constify instance of "struct attribute_group"
  xen-blkfront: prefer xenbus_scanf() over xenbus_gather()
  xen-blkback: prefer xenbus_scanf() over xenbus_gather()
  xen: support runqueue steal time on xen
  arm/xen: add support for vm_assist hypercall
  xen: update xen headers
  xen-pciback: drop superfluous variables
  xen-pciback: short-circuit read path used for merging write values
  ...

1  2 
arch/arm64/kernel/setup.c
arch/x86/include/asm/cpu.h
arch/x86/include/asm/smp.h
arch/x86/kernel/apic/apic.c
arch/x86/xen/enlighten.c
drivers/acpi/scan.c
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/firmware/efi/efi.c
kernel/sched/cputime.c

index 5b8256770e221d2e0deda02f2e3015143035caeb,feab2eebb283017c4328c753504ac453fa6e431a..2981f1bdd07336c29f50c83a628fef43ce597db5
@@@ -202,7 -202,7 +202,7 @@@ static void __init request_standard_res
        struct resource *res;
  
        kernel_code.start   = virt_to_phys(_text);
 -      kernel_code.end     = virt_to_phys(_etext - 1);
 +      kernel_code.end     = virt_to_phys(__init_begin - 1);
        kernel_data.start   = virt_to_phys(_sdata);
        kernel_data.end     = virt_to_phys(_end - 1);
  
@@@ -257,16 -257,15 +257,17 @@@ void __init setup_arch(char **cmdline_p
         */
        cpu_uninstall_idmap();
  
+       xen_early_init();
        efi_init();
        arm64_memblock_init();
  
 +      paging_init();
 +
 +      acpi_table_upgrade();
 +
        /* Parse the ACPI tables for possible boot-time configuration */
        acpi_boot_table_init();
  
 -      paging_init();
 -
        if (acpi_disabled)
                unflatten_device_tree();
  
        else
                psci_acpi_init();
  
-       xen_early_init();
        cpu_read_bootcpu_ops();
        smp_init_cpus();
        smp_build_mpidr_hash();
index 59d34c521d964f294e4ad66ce6900ad16b19d999,a7fb9ddab16a3518086b3486f09f4565771001c1..9b7fa6313f1a6d28608943b9a40f0838f8b80389
@@@ -16,7 -16,9 +16,8 @@@ extern void prefill_possible_map(void)
  static inline void prefill_possible_map(void) {}
  
  #define cpu_physical_id(cpu)                  boot_cpu_physical_apicid
+ #define cpu_acpi_id(cpu)                      0
  #define safe_smp_processor_id()                       0
 -#define stack_smp_processor_id()              0
  
  #endif /* CONFIG_SMP */
  
index c9734dc76257e3d388d303806ec8bf535f977ded,c47b42b0d2837f416df2842efc7ff939692389df..ebd0c164cd4e9033ebb42ff119f8cfe05c901f65
@@@ -33,6 -33,7 +33,7 @@@ static inline struct cpumask *cpu_llc_s
  }
  
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
  #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
@@@ -135,7 -136,6 +136,7 @@@ int native_cpu_up(unsigned int cpunum, 
  int native_cpu_disable(void);
  int common_cpu_die(unsigned int cpu);
  void native_cpu_die(unsigned int cpu);
 +void hlt_play_dead(void);
  void native_play_dead(void);
  void play_dead_common(void);
  void wbinvd_on_cpu(int cpu);
@@@ -148,6 -148,7 +149,7 @@@ void x86_idle_thread_init(unsigned int 
  void smp_store_boot_cpu_info(void);
  void smp_store_cpu_info(int id);
  #define cpu_physical_id(cpu)  per_cpu(x86_cpu_to_apicid, cpu)
+ #define cpu_acpi_id(cpu)      per_cpu(x86_cpu_to_acpiid, cpu)
  
  #else /* !CONFIG_SMP */
  #define wbinvd_on_cpu(cpu)     wbinvd()
@@@ -173,6 -174,12 +175,6 @@@ extern int safe_smp_processor_id(void)
  #elif defined(CONFIG_X86_64_SMP)
  #define raw_smp_processor_id() (this_cpu_read(cpu_number))
  
 -#define stack_smp_processor_id()                                      \
 -({                                                            \
 -      struct thread_info *ti;                                         \
 -      __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));      \
 -      ti->cpu;                                                        \
 -})
  #define safe_smp_processor_id()               smp_processor_id()
  
  #endif
index f943d2f453a4eb2fda5954b5115d26f7dda64d0b,db2326fd6cfa3cc1deaba34b250f4dbd3d516eea..ac8d8ad8b0091507e2ad4dedf1475d3e42d3716e
@@@ -92,8 -92,10 +92,10 @@@ static int apic_extnmi = APIC_EXTNMI_BS
   */
  DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
  DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
+ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
  EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
  EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
  
  #ifdef CONFIG_X86_32
  
@@@ -2045,7 -2047,7 +2047,7 @@@ int generic_processor_info(int apicid, 
                int thiscpu = max + disabled_cpus - 1;
  
                pr_warning(
 -                      "ACPI: NR_CPUS/possible_cpus limit of %i almost"
 +                      "APIC: NR_CPUS/possible_cpus limit of %i almost"
                        " reached. Keeping one slot for boot cpu."
                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
  
                int thiscpu = max + disabled_cpus;
  
                pr_warning(
 -                      "ACPI: NR_CPUS/possible_cpus limit of %i reached."
 +                      "APIC: NR_CPUS/possible_cpus limit of %i reached."
                        "  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
  
                disabled_cpus++;
        if (topology_update_package_map(apicid, cpu) < 0) {
                int thiscpu = max + disabled_cpus;
  
 -              pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
 +              pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
                           thiscpu, apicid);
                disabled_cpus++;
                return -ENOSPC;
diff --combined arch/x86/xen/enlighten.c
index 0f87db2cc6a88fa765718a91219352832b63fc11,85ef4c0442e01aa193a104d44d7459c8f828494a..69b4b6d2973892e6095f56564977dc0cfb04944f
@@@ -59,6 -59,7 +59,7 @@@
  #include <asm/xen/pci.h>
  #include <asm/xen/hypercall.h>
  #include <asm/xen/hypervisor.h>
+ #include <asm/xen/cpuid.h>
  #include <asm/fixmap.h>
  #include <asm/processor.h>
  #include <asm/proto.h>
@@@ -118,6 -119,10 +119,10 @@@ DEFINE_PER_CPU(struct vcpu_info *, xen_
   */
  DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
  
+ /* Linux <-> Xen vCPU id mapping */
+ DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+ EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
  enum xen_domain_type xen_domain_type = XEN_NATIVE;
  EXPORT_SYMBOL_GPL(xen_domain_type);
  
@@@ -179,7 -184,7 +184,7 @@@ static void clamp_max_cpus(void
  #endif
  }
  
static void xen_vcpu_setup(int cpu)
+ void xen_vcpu_setup(int cpu)
  {
        struct vcpu_register_vcpu_info info;
        int err;
                if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
                        return;
        }
-       if (cpu < MAX_VIRT_CPUS)
-               per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+       if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+               per_cpu(xen_vcpu, cpu) =
+                       &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
  
        if (!have_vcpu_info_placement) {
                if (cpu >= MAX_VIRT_CPUS)
           hypervisor has no unregister variant and this hypercall does not
           allow to over-write info.mfn and info.offset.
         */
-       err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+       err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
+                                &info);
  
        if (err) {
                printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
@@@ -247,10 -254,11 +254,11 @@@ void xen_vcpu_restore(void
  
        for_each_possible_cpu(cpu) {
                bool other_cpu = (cpu != smp_processor_id());
-               bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
+               bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
+                                               NULL);
  
                if (other_cpu && is_up &&
-                   HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
+                   HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
                        BUG();
  
                xen_setup_runstate_info(cpu);
                        xen_vcpu_setup(cpu);
  
                if (other_cpu && is_up &&
-                   HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
+                   HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
                        BUG();
        }
  }
@@@ -521,7 -529,9 +529,7 @@@ static void set_aliased_prot(void *v, p
  
        preempt_disable();
  
 -      pagefault_disable();    /* Avoid warnings due to being atomic. */
 -      __get_user(dummy, (unsigned char __user __force *)v);
 -      pagefault_enable();
 +      probe_kernel_read(&dummy, v, 1);
  
        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
                BUG();
@@@ -588,7 -598,7 +596,7 @@@ static void xen_load_gdt(const struct d
  {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
-       unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+       unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
        unsigned long frames[pages];
        int f;
  
@@@ -637,7 -647,7 +645,7 @@@ static void __init xen_load_gdt_boot(co
  {
        unsigned long va = dtr->address;
        unsigned int size = dtr->size + 1;
-       unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+       unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
        unsigned long frames[pages];
        int f;
  
@@@ -1135,8 -1145,11 +1143,11 @@@ void xen_setup_vcpu_info_placement(void
  {
        int cpu;
  
-       for_each_possible_cpu(cpu)
+       for_each_possible_cpu(cpu) {
+               /* Set up direct vCPU id mapping for PV guests. */
+               per_cpu(xen_vcpu_id, cpu) = cpu;
                xen_vcpu_setup(cpu);
+       }
  
        /* xen_vcpu_setup managed to place the vcpu_info within the
         * percpu area for all cpus, so make use of it. Note that for
@@@ -1727,6 -1740,9 +1738,9 @@@ asmlinkage __visible void __init xen_st
  #endif
        xen_raw_console_write("about to get started...\n");
  
+       /* Let's presume PV guests always boot on vCPU with id 0. */
+       per_cpu(xen_vcpu_id, 0) = 0;
        xen_setup_runstate_info(0);
  
        xen_efi_init();
@@@ -1768,9 -1784,10 +1782,10 @@@ void __ref xen_hvm_init_shared_info(voi
         * in that case multiple vcpus might be online. */
        for_each_online_cpu(cpu) {
                /* Leave it to be NULL. */
-               if (cpu >= MAX_VIRT_CPUS)
+               if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
                        continue;
-               per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+               per_cpu(xen_vcpu, cpu) =
+                       &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
        }
  }
  
@@@ -1795,6 -1812,12 +1810,12 @@@ static void __init init_hvm_pv_info(voi
  
        xen_setup_features();
  
+       cpuid(base + 4, &eax, &ebx, &ecx, &edx);
+       if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
+               this_cpu_write(xen_vcpu_id, ebx);
+       else
+               this_cpu_write(xen_vcpu_id, smp_processor_id());
        pv_info.name = "Xen HVM";
  
        xen_domain_type = XEN_HVM_DOMAIN;
@@@ -1806,6 -1829,10 +1827,10 @@@ static int xen_hvm_cpu_notify(struct no
        int cpu = (long)hcpu;
        switch (action) {
        case CPU_UP_PREPARE:
+               if (cpu_acpi_id(cpu) != U32_MAX)
+                       per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
+               else
+                       per_cpu(xen_vcpu_id, cpu) = cpu;
                xen_vcpu_setup(cpu);
                if (xen_have_vector_callback) {
                        if (xen_feature(XENFEAT_hvm_safe_pvclock))
diff --combined drivers/acpi/scan.c
index 405056b95b05b34a29b9d90c283032d11614047a,cfc73fecaba437a1b913d8f83d4a8b9b957c6fdf..ad9fc84a8601206cec6cc1fb15cb36e5c23250cb
@@@ -46,6 -46,13 +46,13 @@@ DEFINE_MUTEX(acpi_device_lock)
  LIST_HEAD(acpi_wakeup_device_list);
  static DEFINE_MUTEX(acpi_hp_context_lock);
  
+ /*
+  * The UART device described by the SPCR table is the only object which needs
+  * special-casing. Everything else is covered by ACPI namespace paths in STAO
+  * table.
+  */
+ static u64 spcr_uart_addr;
  struct acpi_dep_data {
        struct list_head node;
        acpi_handle master;
@@@ -494,8 -501,6 +501,8 @@@ static void acpi_device_del(struct acpi
        device_del(&device->dev);
  }
  
 +static BLOCKING_NOTIFIER_HEAD(acpi_reconfig_chain);
 +
  static LIST_HEAD(acpi_device_del_list);
  static DEFINE_MUTEX(acpi_device_del_lock);
  
@@@ -516,9 -521,6 +523,9 @@@ static void acpi_device_del_work_fn(str
  
                mutex_unlock(&acpi_device_del_lock);
  
 +              blocking_notifier_call_chain(&acpi_reconfig_chain,
 +                                           ACPI_RECONFIG_DEVICE_REMOVE, adev);
 +
                acpi_device_del(adev);
                /*
                 * Drop references to all power resources that might have been
@@@ -1411,7 -1413,7 +1418,7 @@@ void acpi_init_device_object(struct acp
        acpi_bus_get_flags(device);
        device->flags.match_driver = false;
        device->flags.initialized = true;
 -      device->flags.visited = false;
 +      acpi_device_clear_enumerated(device);
        device_initialize(&device->dev);
        dev_set_uevent_suppress(&device->dev, true);
        acpi_init_coherency(device);
@@@ -1458,6 -1460,41 +1465,41 @@@ static int acpi_add_single_object(struc
        return 0;
  }
  
+ static acpi_status acpi_get_resource_memory(struct acpi_resource *ares,
+                                           void *context)
+ {
+       struct resource *res = context;
+       if (acpi_dev_resource_memory(ares, res))
+               return AE_CTRL_TERMINATE;
+       return AE_OK;
+ }
+ static bool acpi_device_should_be_hidden(acpi_handle handle)
+ {
+       acpi_status status;
+       struct resource res;
+       /* Check if it should ignore the UART device */
+       if (!(spcr_uart_addr && acpi_has_method(handle, METHOD_NAME__CRS)))
+               return false;
+       /*
+        * The UART device described in SPCR table is assumed to have only one
+        * memory resource present. So we only look for the first one here.
+        */
+       status = acpi_walk_resources(handle, METHOD_NAME__CRS,
+                                    acpi_get_resource_memory, &res);
+       if (ACPI_FAILURE(status) || res.start != spcr_uart_addr)
+               return false;
+       acpi_handle_info(handle, "The UART device @%pa in SPCR table will be hidden\n",
+                        &res.start);
+       return true;
+ }
  static int acpi_bus_type_and_status(acpi_handle handle, int *type,
                                    unsigned long long *sta)
  {
        switch (acpi_type) {
        case ACPI_TYPE_ANY:             /* for ACPI_ROOT_OBJECT */
        case ACPI_TYPE_DEVICE:
+               if (acpi_device_should_be_hidden(handle))
+                       return -ENODEV;
                *type = ACPI_BUS_TYPE_DEVICE;
                status = acpi_bus_get_status_handle(handle, sta);
                if (ACPI_FAILURE(status))
@@@ -1681,20 -1721,15 +1726,20 @@@ static void acpi_default_enumeration(st
        bool is_spi_i2c_slave = false;
  
        /*
 -       * Do not enemerate SPI/I2C slaves as they will be enuerated by their
 +       * Do not enumerate SPI/I2C slaves as they will be enumerated by their
         * respective parents.
         */
        INIT_LIST_HEAD(&resource_list);
        acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
                               &is_spi_i2c_slave);
        acpi_dev_free_resource_list(&resource_list);
 -      if (!is_spi_i2c_slave)
 +      if (!is_spi_i2c_slave) {
                acpi_create_platform_device(device);
 +              acpi_device_set_enumerated(device);
 +      } else {
 +              blocking_notifier_call_chain(&acpi_reconfig_chain,
 +                                           ACPI_RECONFIG_DEVICE_ADD, device);
 +      }
  }
  
  static const struct acpi_device_id generic_device_ids[] = {
@@@ -1761,7 -1796,7 +1806,7 @@@ static void acpi_bus_attach(struct acpi
        acpi_bus_get_status(device);
        /* Skip devices that are not present. */
        if (!acpi_device_is_present(device)) {
 -              device->flags.visited = false;
 +              acpi_device_clear_enumerated(device);
                device->flags.power_manageable = 0;
                return;
        }
  
                device->flags.initialized = true;
        }
 -      device->flags.visited = false;
 +
        ret = acpi_scan_attach_handler(device);
        if (ret < 0)
                return;
                if (!ret && device->pnp.type.platform_id)
                        acpi_default_enumeration(device);
        }
 -      device->flags.visited = true;
  
   ok:
        list_for_each_entry(child, &device->children, node)
@@@ -1881,7 -1917,7 +1926,7 @@@ void acpi_bus_trim(struct acpi_device *
         */
        acpi_device_set_power(adev, ACPI_STATE_D3_COLD);
        adev->flags.initialized = false;
 -      adev->flags.visited = false;
 +      acpi_device_clear_enumerated(adev);
  }
  EXPORT_SYMBOL_GPL(acpi_bus_trim);
  
@@@ -1925,11 -1961,24 +1970,26 @@@ static int acpi_bus_scan_fixed(void
        return result < 0 ? result : 0;
  }
  
+ static void __init acpi_get_spcr_uart_addr(void)
+ {
+       acpi_status status;
+       struct acpi_table_spcr *spcr_ptr;
+       status = acpi_get_table(ACPI_SIG_SPCR, 0,
+                               (struct acpi_table_header **)&spcr_ptr);
+       if (ACPI_SUCCESS(status))
+               spcr_uart_addr = spcr_ptr->serial_port.address;
+       else
+               printk(KERN_WARNING PREFIX "STAO table present, but SPCR is missing\n");
+ }
 +static bool acpi_scan_initialized;
 +
  int __init acpi_scan_init(void)
  {
        int result;
+       acpi_status status;
+       struct acpi_table_stao *stao_ptr;
  
        acpi_pci_root_init();
        acpi_pci_link_init();
  
        acpi_scan_add_handler(&generic_device_handler);
  
+       /*
+        * If there is STAO table, check whether it needs to ignore the UART
+        * device in SPCR table.
+        */
+       status = acpi_get_table(ACPI_SIG_STAO, 0,
+                               (struct acpi_table_header **)&stao_ptr);
+       if (ACPI_SUCCESS(status)) {
+               if (stao_ptr->header.length > sizeof(struct acpi_table_stao))
+                       printk(KERN_INFO PREFIX "STAO Name List not yet supported.");
+               if (stao_ptr->ignore_uart)
+                       acpi_get_spcr_uart_addr();
+       }
        mutex_lock(&acpi_scan_lock);
        /*
         * Enumerate devices in the ACPI namespace.
  
        acpi_update_all_gpes();
  
 +      acpi_scan_initialized = true;
 +
   out:
        mutex_unlock(&acpi_scan_lock);
        return result;
@@@ -2016,57 -2077,3 +2090,57 @@@ int __init __acpi_probe_device_table(st
  
        return count;
  }
 +
 +struct acpi_table_events_work {
 +      struct work_struct work;
 +      void *table;
 +      u32 event;
 +};
 +
 +static void acpi_table_events_fn(struct work_struct *work)
 +{
 +      struct acpi_table_events_work *tew;
 +
 +      tew = container_of(work, struct acpi_table_events_work, work);
 +
 +      if (tew->event == ACPI_TABLE_EVENT_LOAD) {
 +              acpi_scan_lock_acquire();
 +              acpi_bus_scan(ACPI_ROOT_OBJECT);
 +              acpi_scan_lock_release();
 +      }
 +
 +      kfree(tew);
 +}
 +
 +void acpi_scan_table_handler(u32 event, void *table, void *context)
 +{
 +      struct acpi_table_events_work *tew;
 +
 +      if (!acpi_scan_initialized)
 +              return;
 +
 +      if (event != ACPI_TABLE_EVENT_LOAD)
 +              return;
 +
 +      tew = kmalloc(sizeof(*tew), GFP_KERNEL);
 +      if (!tew)
 +              return;
 +
 +      INIT_WORK(&tew->work, acpi_table_events_fn);
 +      tew->table = table;
 +      tew->event = event;
 +
 +      schedule_work(&tew->work);
 +}
 +
 +int acpi_reconfig_notifier_register(struct notifier_block *nb)
 +{
 +      return blocking_notifier_chain_register(&acpi_reconfig_chain, nb);
 +}
 +EXPORT_SYMBOL(acpi_reconfig_notifier_register);
 +
 +int acpi_reconfig_notifier_unregister(struct notifier_block *nb)
 +{
 +      return blocking_notifier_chain_unregister(&acpi_reconfig_chain, nb);
 +}
 +EXPORT_SYMBOL(acpi_reconfig_notifier_unregister);
index 2994cfa44c8aad35bc245c951101376e49eaf911,4a2412127d8f6a18d00c7892642a0bae36ae3c9b..3cc6d1d86f1efc038f451dab3bb3452bed7de92b
@@@ -379,7 -379,7 +379,7 @@@ static struct attribute *xen_vbdstat_at
        NULL
  };
  
- static struct attribute_group xen_vbdstat_group = {
+ static const struct attribute_group xen_vbdstat_group = {
        .name = "statistics",
        .attrs = xen_vbdstat_attrs,
  };
@@@ -480,7 -480,7 +480,7 @@@ static int xen_vbd_create(struct xen_bl
        if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
                vbd->flush_support = true;
  
 -      if (q && blk_queue_secdiscard(q))
 +      if (q && blk_queue_secure_erase(q))
                vbd->discard_secure = true;
  
        pr_debug("Successful creation of handle=%04x (dom=%u)\n",
@@@ -715,8 -715,11 +715,11 @@@ static void backend_changed(struct xenb
  
        /* Front end dir is a number, which is used as the handle. */
        err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
-       if (err)
+       if (err) {
+               kfree(be->mode);
+               be->mode = NULL;
                return;
+       }
  
        be->major = major;
        be->minor = minor;
@@@ -1022,9 -1025,9 +1025,9 @@@ static int connect_ring(struct backend_
        pr_debug("%s %s\n", __func__, dev->otherend);
  
        be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
-       err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
-                           "%63s", protocol, NULL);
-       if (err)
+       err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
+                          "%63s", protocol);
+       if (err <= 0)
                strcpy(protocol, "unspecified, assuming default");
        else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
                be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
                xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
                return -ENOSYS;
        }
-       err = xenbus_gather(XBT_NIL, dev->otherend,
-                           "feature-persistent", "%u",
-                           &pers_grants, NULL);
-       if (err)
+       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                          "feature-persistent", "%u", &pers_grants);
+       if (err <= 0)
                pers_grants = 0;
  
        be->blkif->vbd.feature_gnt_persistent = pers_grants;
index 0b6682a33e3b836e35a70aa5ba445f8083017f8f,ca0536eb70373c846100aa1de242caf5f48971d5..be4fea6a5dd33695df30f87a1fea5341eadbd709
@@@ -196,7 -196,6 +196,7 @@@ struct blkfront_inf
        unsigned int nr_ring_pages;
        struct request_queue *rq;
        unsigned int feature_flush;
 +      unsigned int feature_fua;
        unsigned int feature_discard:1;
        unsigned int feature_secdiscard:1;
        unsigned int discard_granularity;
        struct blk_mq_tag_set tag_set;
        struct blkfront_ring_info *rinfo;
        unsigned int nr_rings;
 +      /* Save uncomplete reqs and bios for migration. */
 +      struct list_head requests;
 +      struct bio_list bio_list;
  };
  
  static unsigned int nr_minors;
@@@ -548,7 -544,7 +548,7 @@@ static int blkif_queue_discard_req(stru
        ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
        ring_req->u.discard.id = id;
        ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
 -      if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 +      if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard)
                ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
        else
                ring_req->u.discard.flag = 0;
@@@ -747,7 -743,7 +747,7 @@@ static int blkif_queue_rw_req(struct re
                 * The indirect operation can only be a BLKIF_OP_READ or
                 * BLKIF_OP_WRITE
                 */
 -              BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
 +              BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA);
                ring_req->operation = BLKIF_OP_INDIRECT;
                ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
                        BLKIF_OP_WRITE : BLKIF_OP_READ;
                ring_req->u.rw.handle = info->handle;
                ring_req->operation = rq_data_dir(req) ?
                        BLKIF_OP_WRITE : BLKIF_OP_READ;
 -              if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
 +              if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
                        /*
                         * Ideally we can do an unordered flush-to-disk.
                         * In case the backend onlysupports barriers, use that.
                         * implement it the same way.  (It's also a FLUSH+FUA,
                         * since it is guaranteed ordered WRT previous writes.)
                         */
 -                      switch (info->feature_flush &
 -                              ((REQ_FLUSH|REQ_FUA))) {
 -                      case REQ_FLUSH|REQ_FUA:
 +                      if (info->feature_flush && info->feature_fua)
                                ring_req->operation =
                                        BLKIF_OP_WRITE_BARRIER;
 -                              break;
 -                      case REQ_FLUSH:
 +                      else if (info->feature_flush)
                                ring_req->operation =
                                        BLKIF_OP_FLUSH_DISKCACHE;
 -                              break;
 -                      default:
 +                      else
                                ring_req->operation = 0;
 -                      }
                }
                ring_req->u.rw.nr_segments = num_grant;
                if (unlikely(require_extra_req)) {
@@@ -843,8 -844,7 +843,8 @@@ static int blkif_queue_request(struct r
        if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
                return 1;
  
 -      if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
 +      if (unlikely(req_op(req) == REQ_OP_DISCARD ||
 +                   req_op(req) == REQ_OP_SECURE_ERASE))
                return blkif_queue_discard_req(req, rinfo);
        else
                return blkif_queue_rw_req(req, rinfo);
@@@ -864,10 -864,10 +864,10 @@@ static inline bool blkif_request_flush_
                                               struct blkfront_info *info)
  {
        return ((req->cmd_type != REQ_TYPE_FS) ||
 -              ((req->cmd_flags & REQ_FLUSH) &&
 -               !(info->feature_flush & REQ_FLUSH)) ||
 +              ((req_op(req) == REQ_OP_FLUSH) &&
 +               !info->feature_flush) ||
                ((req->cmd_flags & REQ_FUA) &&
 -               !(info->feature_flush & REQ_FUA)));
 +               !info->feature_fua));
  }
  
  static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
@@@ -952,7 -952,7 +952,7 @@@ static int xlvbd_init_blk_queue(struct 
                rq->limits.discard_granularity = info->discard_granularity;
                rq->limits.discard_alignment = info->discard_alignment;
                if (info->feature_secdiscard)
 -                      queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
 +                      queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
        }
  
        /* Hard sector size and max sectors impersonate the equiv. hardware. */
        return 0;
  }
  
 -static const char *flush_info(unsigned int feature_flush)
 +static const char *flush_info(struct blkfront_info *info)
  {
 -      switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) {
 -      case REQ_FLUSH|REQ_FUA:
 +      if (info->feature_flush && info->feature_fua)
                return "barrier: enabled;";
 -      case REQ_FLUSH:
 +      else if (info->feature_flush)
                return "flush diskcache: enabled;";
 -      default:
 +      else
                return "barrier or flush: disabled;";
 -      }
  }
  
  static void xlvbd_flush(struct blkfront_info *info)
  {
 -      blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH,
 -                              info->feature_flush & REQ_FUA);
 +      blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
 +                            info->feature_fua ? true : false);
        pr_info("blkfront: %s: %s %s %s %s %s\n",
 -              info->gd->disk_name, flush_info(info->feature_flush),
 +              info->gd->disk_name, flush_info(info),
                "persistent grants:", info->feature_persistent ?
                "enabled;" : "disabled;", "indirect descriptors:",
                info->max_indirect_segments ? "enabled;" : "disabled;");
@@@ -1134,6 -1136,7 +1134,6 @@@ static int xlvbd_alloc_gendisk(blkif_se
        gd->first_minor = minor;
        gd->fops = &xlvbd_block_fops;
        gd->private_data = info;
 -      gd->driverfs_dev = &(info->xbdev->dev);
        set_capacity(gd, capacity);
  
        if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
@@@ -1591,7 -1594,7 +1591,7 @@@ static irqreturn_t blkif_interrupt(int 
                                info->feature_discard = 0;
                                info->feature_secdiscard = 0;
                                queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
 -                              queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
 +                              queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
                        }
                        blk_mq_complete_request(req, error);
                        break;
                        if (unlikely(error)) {
                                if (error == -EOPNOTSUPP)
                                        error = 0;
 +                              info->feature_fua = 0;
                                info->feature_flush = 0;
                                xlvbd_flush(info);
                        }
@@@ -2000,22 -2002,69 +2000,22 @@@ static int blkif_recover(struct blkfron
  {
        unsigned int i, r_index;
        struct request *req, *n;
 -      struct blk_shadow *copy;
        int rc;
        struct bio *bio, *cloned_bio;
 -      struct bio_list bio_list, merge_bio;
        unsigned int segs, offset;
        int pending, size;
        struct split_bio *split_bio;
 -      struct list_head requests;
  
        blkfront_gather_backend_features(info);
        segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
        blk_queue_max_segments(info->rq, segs);
 -      bio_list_init(&bio_list);
 -      INIT_LIST_HEAD(&requests);
  
        for (r_index = 0; r_index < info->nr_rings; r_index++) {
 -              struct blkfront_ring_info *rinfo;
 -
 -              rinfo = &info->rinfo[r_index];
 -              /* Stage 1: Make a safe copy of the shadow state. */
 -              copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
 -                             GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
 -              if (!copy)
 -                      return -ENOMEM;
 -
 -              /* Stage 2: Set up free list. */
 -              memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
 -              for (i = 0; i < BLK_RING_SIZE(info); i++)
 -                      rinfo->shadow[i].req.u.rw.id = i+1;
 -              rinfo->shadow_free = rinfo->ring.req_prod_pvt;
 -              rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
 +              struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
  
                rc = blkfront_setup_indirect(rinfo);
 -              if (rc) {
 -                      kfree(copy);
 +              if (rc)
                        return rc;
 -              }
 -
 -              for (i = 0; i < BLK_RING_SIZE(info); i++) {
 -                      /* Not in use? */
 -                      if (!copy[i].request)
 -                              continue;
 -
 -                      /*
 -                       * Get the bios in the request so we can re-queue them.
 -                       */
 -                      if (copy[i].request->cmd_flags &
 -                          (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
 -                              /*
 -                               * Flush operations don't contain bios, so
 -                               * we need to requeue the whole request
 -                               */
 -                              list_add(&copy[i].request->queuelist, &requests);
 -                              continue;
 -                      }
 -                      merge_bio.head = copy[i].request->bio;
 -                      merge_bio.tail = copy[i].request->biotail;
 -                      bio_list_merge(&bio_list, &merge_bio);
 -                      copy[i].request->bio = NULL;
 -                      blk_end_request_all(copy[i].request, 0);
 -              }
 -
 -              kfree(copy);
        }
        xenbus_switch_state(info->xbdev, XenbusStateConnected);
  
                kick_pending_request_queues(rinfo);
        }
  
 -      list_for_each_entry_safe(req, n, &requests, queuelist) {
 +      list_for_each_entry_safe(req, n, &info->requests, queuelist) {
                /* Requeue pending requests (flush or discard) */
                list_del_init(&req->queuelist);
                BUG_ON(req->nr_phys_segments > segs);
        }
        blk_mq_kick_requeue_list(info->rq);
  
 -      while ((bio = bio_list_pop(&bio_list)) != NULL) {
 +      while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
                /* Traverse the list of pending bios and re-queue them */
                if (bio_segments(bio) > segs) {
                        /*
                                bio_trim(cloned_bio, offset, size);
                                cloned_bio->bi_private = split_bio;
                                cloned_bio->bi_end_io = split_bio_end;
 -                              submit_bio(cloned_bio->bi_rw, cloned_bio);
 +                              submit_bio(cloned_bio);
                        }
                        /*
                         * Now we have to wait for all those smaller bios to
                        continue;
                }
                /* We don't need to split this bio */
 -              submit_bio(bio->bi_rw, bio);
 +              submit_bio(bio);
        }
  
        return 0;
@@@ -2084,47 -2133,9 +2084,47 @@@ static int blkfront_resume(struct xenbu
  {
        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
        int err = 0;
 +      unsigned int i, j;
  
        dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
  
 +      bio_list_init(&info->bio_list);
 +      INIT_LIST_HEAD(&info->requests);
 +      for (i = 0; i < info->nr_rings; i++) {
 +              struct blkfront_ring_info *rinfo = &info->rinfo[i];
 +              struct bio_list merge_bio;
 +              struct blk_shadow *shadow = rinfo->shadow;
 +
 +              for (j = 0; j < BLK_RING_SIZE(info); j++) {
 +                      /* Not in use? */
 +                      if (!shadow[j].request)
 +                              continue;
 +
 +                      /*
 +                       * Get the bios in the request so we can re-queue them.
 +                       */
 +                      if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
 +                          req_op(shadow[i].request) == REQ_OP_DISCARD ||
 +                          req_op(shadow[i].request) == REQ_OP_SECURE_ERASE ||
 +                          shadow[j].request->cmd_flags & REQ_FUA) {
 +                              /*
 +                               * Flush operations don't contain bios, so
 +                               * we need to requeue the whole request
 +                               *
 +                               * XXX: but this doesn't make any sense for a
 +                               * write with the FUA flag set..
 +                               */
 +                              list_add(&shadow[j].request->queuelist, &info->requests);
 +                              continue;
 +                      }
 +                      merge_bio.head = shadow[j].request->bio;
 +                      merge_bio.tail = shadow[j].request->biotail;
 +                      bio_list_merge(&info->bio_list, &merge_bio);
 +                      shadow[j].request->bio = NULL;
 +                      blk_mq_end_request(shadow[j].request, 0);
 +              }
 +      }
 +
        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
  
        err = negotiate_mq(info);
@@@ -2197,10 -2208,9 +2197,9 @@@ static void blkfront_setup_discard(stru
                info->discard_granularity = discard_granularity;
                info->discard_alignment = discard_alignment;
        }
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                   "discard-secure", "%d", &discard_secure,
-                   NULL);
-       if (!err)
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "discard-secure", "%u", &discard_secure);
+       if (err > 0)
                info->feature_secdiscard = !!discard_secure;
  }
  
@@@ -2298,11 -2308,9 +2297,10 @@@ static void blkfront_gather_backend_fea
        unsigned int indirect_segments;
  
        info->feature_flush = 0;
 +      info->feature_fua = 0;
  
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                       "feature-barrier", "%d", &barrier,
-                       NULL);
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "feature-barrier", "%d", &barrier);
  
        /*
         * If there's no "feature-barrier" defined, then it means
         *
         * If there are barriers, then we use flush.
         */
-       if (!err && barrier) {
 -      if (err > 0 && barrier)
 -              info->feature_flush = REQ_FLUSH | REQ_FUA;
++      if (err > 0 && barrier) {
 +              info->feature_flush = 1;
 +              info->feature_fua = 1;
 +      }
 +
        /*
         * And if there is "feature-flush-cache" use that above
         * barriers.
         */
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                       "feature-flush-cache", "%d", &flush,
-                       NULL);
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "feature-flush-cache", "%d", &flush);
  
-       if (!err && flush) {
 -      if (err > 0 && flush)
 -              info->feature_flush = REQ_FLUSH;
++      if (err > 0 && flush) {
 +              info->feature_flush = 1;
 +              info->feature_fua = 0;
 +      }
  
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                       "feature-discard", "%d", &discard,
-                       NULL);
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "feature-discard", "%d", &discard);
  
-       if (!err && discard)
+       if (err > 0 && discard)
                blkfront_setup_discard(info);
  
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                       "feature-persistent", "%u", &persistent,
-                       NULL);
-       if (err)
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "feature-persistent", "%d", &persistent);
+       if (err <= 0)
                info->feature_persistent = 0;
        else
                info->feature_persistent = persistent;
  
-       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-                           "feature-max-indirect-segments", "%u", &indirect_segments,
-                           NULL);
-       if (err)
+       err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+                          "feature-max-indirect-segments", "%u",
+                          &indirect_segments);
+       if (err <= 0)
                info->max_indirect_segments = 0;
        else
                info->max_indirect_segments = min(indirect_segments,
@@@ -2447,7 -2447,7 +2442,7 @@@ static void blkfront_connect(struct blk
        for (i = 0; i < info->nr_rings; i++)
                kick_pending_request_queues(&info->rinfo[i]);
  
 -      add_disk(info->gd);
 +      device_add_disk(&info->xbdev->dev, info->gd);
  
        info->is_ready = 1;
  }
index 8730fd475bf3d78d658bd979d2122af3f8c7ddc3,1c6f9dda3c3e616344486a74f6efa962d75abe17..5a2631af7410782dc8f7ba993ab0b1139bf71abb
@@@ -24,9 -24,6 +24,9 @@@
  #include <linux/of_fdt.h>
  #include <linux/io.h>
  #include <linux/platform_device.h>
 +#include <linux/slab.h>
 +#include <linux/acpi.h>
 +#include <linux/ucs2_string.h>
  
  #include <asm/early_ioremap.h>
  
@@@ -198,96 -195,6 +198,96 @@@ static void generic_ops_unregister(void
        efivars_unregister(&generic_efivars);
  }
  
 +#if IS_ENABLED(CONFIG_ACPI)
 +#define EFIVAR_SSDT_NAME_MAX  16
 +static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
 +static int __init efivar_ssdt_setup(char *str)
 +{
 +      if (strlen(str) < sizeof(efivar_ssdt))
 +              memcpy(efivar_ssdt, str, strlen(str));
 +      else
 +              pr_warn("efivar_ssdt: name too long: %s\n", str);
 +      return 0;
 +}
 +__setup("efivar_ssdt=", efivar_ssdt_setup);
 +
 +static __init int efivar_ssdt_iter(efi_char16_t *name, efi_guid_t vendor,
 +                                 unsigned long name_size, void *data)
 +{
 +      struct efivar_entry *entry;
 +      struct list_head *list = data;
 +      char utf8_name[EFIVAR_SSDT_NAME_MAX];
 +      int limit = min_t(unsigned long, EFIVAR_SSDT_NAME_MAX, name_size);
 +
 +      ucs2_as_utf8(utf8_name, name, limit - 1);
 +      if (strncmp(utf8_name, efivar_ssdt, limit) != 0)
 +              return 0;
 +
 +      entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 +      if (!entry)
 +              return 0;
 +
 +      memcpy(entry->var.VariableName, name, name_size);
 +      memcpy(&entry->var.VendorGuid, &vendor, sizeof(efi_guid_t));
 +
 +      efivar_entry_add(entry, list);
 +
 +      return 0;
 +}
 +
 +static __init int efivar_ssdt_load(void)
 +{
 +      LIST_HEAD(entries);
 +      struct efivar_entry *entry, *aux;
 +      unsigned long size;
 +      void *data;
 +      int ret;
 +
 +      ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
 +
 +      list_for_each_entry_safe(entry, aux, &entries, list) {
 +              pr_info("loading SSDT from variable %s-%pUl\n", efivar_ssdt,
 +                      &entry->var.VendorGuid);
 +
 +              list_del(&entry->list);
 +
 +              ret = efivar_entry_size(entry, &size);
 +              if (ret) {
 +                      pr_err("failed to get var size\n");
 +                      goto free_entry;
 +              }
 +
 +              data = kmalloc(size, GFP_KERNEL);
 +              if (!data)
 +                      goto free_entry;
 +
 +              ret = efivar_entry_get(entry, NULL, &size, data);
 +              if (ret) {
 +                      pr_err("failed to get var data\n");
 +                      goto free_data;
 +              }
 +
 +              ret = acpi_load_table(data);
 +              if (ret) {
 +                      pr_err("failed to load table: %d\n", ret);
 +                      goto free_data;
 +              }
 +
 +              goto free_entry;
 +
 +free_data:
 +              kfree(data);
 +
 +free_entry:
 +              kfree(entry);
 +      }
 +
 +      return ret;
 +}
 +#else
 +static inline int efivar_ssdt_load(void) { return 0; }
 +#endif
 +
  /*
   * We register the efi subsystem with the firmware subsystem and the
   * efivars subsystem with the efi subsystem, if the system was booted with
@@@ -311,9 -218,6 +311,9 @@@ static int __init efisubsys_init(void
        if (error)
                goto err_put;
  
 +      if (efi_enabled(EFI_RUNTIME_SERVICES))
 +              efivar_ssdt_load();
 +
        error = sysfs_create_group(efi_kobj, &efi_subsys_attr_group);
        if (error) {
                pr_err("efi: Sysfs attribute export failed with error %d.\n",
@@@ -568,12 -472,14 +568,14 @@@ device_initcall(efi_load_efivars)
                FIELD_SIZEOF(struct efi_fdt_params, field) \
        }
  
- static __initdata struct {
+ struct params {
        const char name[32];
        const char propname[32];
        int offset;
        int size;
- } dt_params[] = {
+ };
+ static __initdata struct params fdt_params[] = {
        UEFI_PARAM("System Table", "linux,uefi-system-table", system_table),
        UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap),
        UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size),
        UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver)
  };
  
+ static __initdata struct params xen_fdt_params[] = {
+       UEFI_PARAM("System Table", "xen,uefi-system-table", system_table),
+       UEFI_PARAM("MemMap Address", "xen,uefi-mmap-start", mmap),
+       UEFI_PARAM("MemMap Size", "xen,uefi-mmap-size", mmap_size),
+       UEFI_PARAM("MemMap Desc. Size", "xen,uefi-mmap-desc-size", desc_size),
+       UEFI_PARAM("MemMap Desc. Version", "xen,uefi-mmap-desc-ver", desc_ver)
+ };
+ #define EFI_FDT_PARAMS_SIZE   ARRAY_SIZE(fdt_params)
+ static __initdata struct {
+       const char *uname;
+       const char *subnode;
+       struct params *params;
+ } dt_params[] = {
+       { "hypervisor", "uefi", xen_fdt_params },
+       { "chosen", NULL, fdt_params },
+ };
  struct param_info {
        int found;
        void *params;
+       const char *missing;
  };
  
- static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
-                                      int depth, void *data)
+ static int __init __find_uefi_params(unsigned long node,
+                                    struct param_info *info,
+                                    struct params *params)
  {
-       struct param_info *info = data;
        const void *prop;
        void *dest;
        u64 val;
        int i, len;
  
-       if (depth != 1 || strcmp(uname, "chosen") != 0)
-               return 0;
-       for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
-               prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
-               if (!prop)
+       for (i = 0; i < EFI_FDT_PARAMS_SIZE; i++) {
+               prop = of_get_flat_dt_prop(node, params[i].propname, &len);
+               if (!prop) {
+                       info->missing = params[i].name;
                        return 0;
-               dest = info->params + dt_params[i].offset;
+               }
+               dest = info->params + params[i].offset;
                info->found++;
  
                val = of_read_number(prop, len / sizeof(u32));
  
-               if (dt_params[i].size == sizeof(u32))
+               if (params[i].size == sizeof(u32))
                        *(u32 *)dest = val;
                else
                        *(u64 *)dest = val;
  
                if (efi_enabled(EFI_DBG))
-                       pr_info("  %s: 0x%0*llx\n", dt_params[i].name,
-                               dt_params[i].size * 2, val);
+                       pr_info("  %s: 0x%0*llx\n", params[i].name,
+                               params[i].size * 2, val);
        }
        return 1;
  }
  
+ static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
+                                      int depth, void *data)
+ {
+       struct param_info *info = data;
+       int i;
+       for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
+               const char *subnode = dt_params[i].subnode;
+               if (depth != 1 || strcmp(uname, dt_params[i].uname) != 0) {
+                       info->missing = dt_params[i].params[0].name;
+                       continue;
+               }
+               if (subnode) {
+                       node = of_get_flat_dt_subnode_by_name(node, subnode);
+                       if (node < 0)
+                               return 0;
+               }
+               return __find_uefi_params(node, info, dt_params[i].params);
+       }
+       return 0;
+ }
  int __init efi_get_fdt_params(struct efi_fdt_params *params)
  {
        struct param_info info;
                pr_info("UEFI not found.\n");
        else if (!ret)
                pr_err("Can't find '%s' in device tree!\n",
-                      dt_params[info.found].name);
+                      info.missing);
  
        return ret;
  }
diff --combined kernel/sched/cputime.c
index ea0f6f31a2449440e502ee029eb2f3cfe2328c1b,8c4c6dcc052c867fe480b7541575dc66a2a7cb04..1934f658c03604272e5809f32fee1a6a3c928990
@@@ -49,12 -49,15 +49,12 @@@ DEFINE_PER_CPU(seqcount_t, irq_time_seq
   */
  void irqtime_account_irq(struct task_struct *curr)
  {
 -      unsigned long flags;
        s64 delta;
        int cpu;
  
        if (!sched_clock_irqtime)
                return;
  
 -      local_irq_save(flags);
 -
        cpu = smp_processor_id();
        delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
        __this_cpu_add(irq_start_time, delta);
                __this_cpu_add(cpu_softirq_time, delta);
  
        irq_time_write_end();
 -      local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(irqtime_account_irq);
  
 -static int irqtime_account_hi_update(void)
 +static cputime_t irqtime_account_hi_update(cputime_t maxtime)
  {
        u64 *cpustat = kcpustat_this_cpu->cpustat;
        unsigned long flags;
 -      u64 latest_ns;
 -      int ret = 0;
 +      cputime_t irq_cputime;
  
        local_irq_save(flags);
 -      latest_ns = this_cpu_read(cpu_hardirq_time);
 -      if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
 -              ret = 1;
 +      irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
 +                    cpustat[CPUTIME_IRQ];
 +      irq_cputime = min(irq_cputime, maxtime);
 +      cpustat[CPUTIME_IRQ] += irq_cputime;
        local_irq_restore(flags);
 -      return ret;
 +      return irq_cputime;
  }
  
 -static int irqtime_account_si_update(void)
 +static cputime_t irqtime_account_si_update(cputime_t maxtime)
  {
        u64 *cpustat = kcpustat_this_cpu->cpustat;
        unsigned long flags;
 -      u64 latest_ns;
 -      int ret = 0;
 +      cputime_t softirq_cputime;
  
        local_irq_save(flags);
 -      latest_ns = this_cpu_read(cpu_softirq_time);
 -      if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
 -              ret = 1;
 +      softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
 +                        cpustat[CPUTIME_SOFTIRQ];
 +      softirq_cputime = min(softirq_cputime, maxtime);
 +      cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
        local_irq_restore(flags);
 -      return ret;
 +      return softirq_cputime;
  }
  
  #else /* CONFIG_IRQ_TIME_ACCOUNTING */
  
  #define sched_clock_irqtime   (0)
  
 +static cputime_t irqtime_account_hi_update(cputime_t dummy)
 +{
 +      return 0;
 +}
 +
 +static cputime_t irqtime_account_si_update(cputime_t dummy)
 +{
 +      return 0;
 +}
 +
  #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
  
  static inline void task_group_account_field(struct task_struct *p, int index,
@@@ -263,42 -257,29 +263,42 @@@ void account_idle_time(cputime_t cputim
                cpustat[CPUTIME_IDLE] += (__force u64) cputime;
  }
  
 -static __always_inline bool steal_account_process_tick(void)
 +static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
  {
  #ifdef CONFIG_PARAVIRT
        if (static_key_false(&paravirt_steal_enabled)) {
 +              cputime_t steal_cputime;
                u64 steal;
 -              unsigned long steal_jiffies;
  
                steal = paravirt_steal_clock(smp_processor_id());
                steal -= this_rq()->prev_steal_time;
  
 -              /*
 -               * steal is in nsecs but our caller is expecting steal
 -               * time in jiffies. Lets cast the result to jiffies
 -               * granularity and account the rest on the next rounds.
 -               */
 -              steal_jiffies = nsecs_to_jiffies(steal);
 -              this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
 +              steal_cputime = min(nsecs_to_cputime(steal), maxtime);
 +              account_steal_time(steal_cputime);
 +              this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
  
 -              account_steal_time(jiffies_to_cputime(steal_jiffies));
 -              return steal_jiffies;
 +              return steal_cputime;
        }
  #endif
 -      return false;
 +      return 0;
 +}
 +
 +/*
 + * Account how much elapsed time was spent in steal, irq, or softirq time.
 + */
 +static inline cputime_t account_other_time(cputime_t max)
 +{
 +      cputime_t accounted;
 +
 +      accounted = steal_account_process_time(max);
 +
 +      if (accounted < max)
 +              accounted += irqtime_account_hi_update(max - accounted);
 +
 +      if (accounted < max)
 +              accounted += irqtime_account_si_update(max - accounted);
 +
 +      return accounted;
  }
  
  /*
@@@ -361,23 -342,21 +361,23 @@@ void thread_group_cputime(struct task_s
  static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
                                         struct rq *rq, int ticks)
  {
 -      cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
 -      u64 cputime = (__force u64) cputime_one_jiffy;
 -      u64 *cpustat = kcpustat_this_cpu->cpustat;
 +      u64 cputime = (__force u64) cputime_one_jiffy * ticks;
 +      cputime_t scaled, other;
  
 -      if (steal_account_process_tick())
 +      /*
 +       * When returning from idle, many ticks can get accounted at
 +       * once, including some ticks of steal, irq, and softirq time.
 +       * Subtract those ticks from the amount of time accounted to
 +       * idle, or potentially user or system time. Due to rounding,
 +       * other time can exceed ticks occasionally.
 +       */
 +      other = account_other_time(cputime);
 +      if (other >= cputime)
                return;
 +      cputime -= other;
 +      scaled = cputime_to_scaled(cputime);
  
 -      cputime *= ticks;
 -      scaled *= ticks;
 -
 -      if (irqtime_account_hi_update()) {
 -              cpustat[CPUTIME_IRQ] += cputime;
 -      } else if (irqtime_account_si_update()) {
 -              cpustat[CPUTIME_SOFTIRQ] += cputime;
 -      } else if (this_cpu_ksoftirqd() == p) {
 +      if (this_cpu_ksoftirqd() == p) {
                /*
                 * ksoftirqd time do not get accounted in cpu_softirq_time.
                 * So, we have to handle it separately here.
@@@ -427,10 -406,6 +427,10 @@@ void vtime_common_task_switch(struct ta
  }
  #endif
  
 +#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 +
 +
 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  /*
   * Archs that account the whole time spent in the idle task
   * (outside irq) as idle time can rely on this and just implement
   * vtime_account().
   */
  #ifndef __ARCH_HAS_VTIME_ACCOUNT
 -void vtime_common_account_irq_enter(struct task_struct *tsk)
 +void vtime_account_irq_enter(struct task_struct *tsk)
  {
 -      if (!in_interrupt()) {
 -              /*
 -               * If we interrupted user, context_tracking_in_user()
 -               * is 1 because the context tracking don't hook
 -               * on irq entry/exit. This way we know if
 -               * we need to flush user time on kernel entry.
 -               */
 -              if (context_tracking_in_user()) {
 -                      vtime_account_user(tsk);
 -                      return;
 -              }
 -
 -              if (is_idle_task(tsk)) {
 -                      vtime_account_idle(tsk);
 -                      return;
 -              }
 -      }
 -      vtime_account_system(tsk);
 +      if (!in_interrupt() && is_idle_task(tsk))
 +              vtime_account_idle(tsk);
 +      else
 +              vtime_account_system(tsk);
  }
 -EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
 +EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
  #endif /* __ARCH_HAS_VTIME_ACCOUNT */
 -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 -
  
 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
  {
        *ut = p->utime;
@@@ -474,7 -466,7 +474,7 @@@ void thread_group_cputime_adjusted(stru
   */
  void account_process_tick(struct task_struct *p, int user_tick)
  {
 -      cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 +      cputime_t cputime, scaled, steal;
        struct rq *rq = this_rq();
  
        if (vtime_accounting_cpu_enabled())
                return;
        }
  
 -      if (steal_account_process_tick())
 +      cputime = cputime_one_jiffy;
 +      steal = steal_account_process_time(cputime);
 +
 +      if (steal >= cputime)
                return;
  
 +      cputime -= steal;
 +      scaled = cputime_to_scaled(cputime);
 +
        if (user_tick)
 -              account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 +              account_user_time(p, cputimescaled);
        else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
 -              account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
 -                                  one_jiffy_scaled);
 +              account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
        else
 -              account_idle_time(cputime_one_jiffy);
 +              account_idle_time(cputime);
  }
  
- /*
-  * Account multiple ticks of steal time.
-  * @p: the process from which the cpu time has been stolen
-  * @ticks: number of stolen ticks
-  */
- void account_steal_ticks(unsigned long ticks)
- {
-       account_steal_time(jiffies_to_cputime(ticks));
- }
  /*
   * Account multiple ticks of idle time.
   * @ticks: number of stolen ticks
@@@ -694,14 -671,12 +684,14 @@@ static cputime_t vtime_delta(struct tas
  static cputime_t get_vtime_delta(struct task_struct *tsk)
  {
        unsigned long now = READ_ONCE(jiffies);
 -      unsigned long delta = now - tsk->vtime_snap;
 +      cputime_t delta, other;
  
 +      delta = jiffies_to_cputime(now - tsk->vtime_snap);
 +      other = account_other_time(delta);
        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
        tsk->vtime_snap = now;
  
 -      return jiffies_to_cputime(delta);
 +      return delta - other;
  }
  
  static void __vtime_account_system(struct task_struct *tsk)
@@@ -721,6 -696,16 +711,6 @@@ void vtime_account_system(struct task_s
        write_seqcount_end(&tsk->vtime_seqcount);
  }
  
 -void vtime_gen_account_irq_exit(struct task_struct *tsk)
 -{
 -      write_seqcount_begin(&tsk->vtime_seqcount);
 -      if (vtime_delta(tsk))
 -              __vtime_account_system(tsk);
 -      if (context_tracking_in_user())
 -              tsk->vtime_snap_whence = VTIME_USER;
 -      write_seqcount_end(&tsk->vtime_seqcount);
 -}
 -
  void vtime_account_user(struct task_struct *tsk)
  {
        cputime_t delta_cpu;