drm/amdkfd: map multiple processes to HW scheduler
authorFelix Kuehling <Felix.Kuehling@amd.com>
Mon, 27 Nov 2017 23:29:45 +0000 (18:29 -0500)
committerOded Gabbay <oded.gabbay@gmail.com>
Mon, 27 Nov 2017 23:29:45 +0000 (18:29 -0500)
Allow HWS to to execute multiple processes on the hardware
concurrently. The number of concurrent processes is limited by
the number of VMIDs allocated to the HWS.

A module parameter can be used for limiting this further or turn
it off altogether (mainly for debugging purposes).

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_module.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 4f05eacca786a0f10d29cf697a3ee53e873513c4..a8fa33a08de301fde244f1584e8a9edeeea40775 100644 (file)
@@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
        kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
                        - kfd->vm_info.first_vmid_kfd + 1;
 
+       /* Verify module parameters regarding mapped process number*/
+       if ((hws_max_conc_proc < 0)
+                       || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
+               dev_err(kfd_device,
+                       "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
+                       hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
+                       kfd->vm_info.vmid_num_kfd);
+               kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
+       } else
+               kfd->max_proc_per_quantum = hws_max_conc_proc;
+
        /* calculate max size of mqds needed for queues */
        size = max_num_of_queues_per_device *
                        kfd->device_info->mqd_size_aligned;
index ee8adf654cd007320b7610a3b69a5feb5551d808..4e060c864c214a15e7a9028e9ed9c9646dc3cdbd 100644 (file)
@@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444);
 MODULE_PARM_DESC(sched_policy,
        "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
 
+int hws_max_conc_proc = 8;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+       "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
 int cwsr_enable = 1;
 module_param(cwsr_enable, int, 0444);
 MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
index 69c147a83591801a4cac386b33adee40eb3c062c..0b7092e59bf948145356efa15f94804b2de8ef02 100644 (file)
@@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 {
        unsigned int process_count, queue_count;
        unsigned int map_queue_size;
+       unsigned int max_proc_per_quantum = 1;
+       struct kfd_dev *dev = pm->dqm->dev;
 
        process_count = pm->dqm->processes_count;
        queue_count = pm->dqm->queue_count;
 
-       /* check if there is over subscription*/
+       /* check if there is over subscription
+        * Note: the arbitration between the number of VMIDs and
+        * hws_max_conc_proc has been done in
+        * kgd2kfd_device_init().
+        */
        *over_subscription = false;
-       if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
+
+       if (dev->max_proc_per_quantum > 1)
+               max_proc_per_quantum = dev->max_proc_per_quantum;
+
+       if ((process_count > max_proc_per_quantum) ||
+           queue_count > get_queues_num(pm->dqm)) {
                *over_subscription = true;
                pr_debug("Over subscribed runlist\n");
        }
@@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
                        uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
        struct pm4_mes_runlist *packet;
+       int concurrent_proc_cnt = 0;
+       struct kfd_dev *kfd = pm->dqm->dev;
 
        if (WARN_ON(!ib))
                return -EFAULT;
 
+       /* Determine the number of processes to map together to HW:
+        * it can not exceed the number of VMIDs available to the
+        * scheduler, and it is determined by the smaller of the number
+        * of processes in the runlist and kfd module parameter
+        * hws_max_conc_proc.
+        * Note: the arbitration between the number of VMIDs and
+        * hws_max_conc_proc has been done in
+        * kgd2kfd_device_init().
+        */
+       concurrent_proc_cnt = min(pm->dqm->processes_count,
+                       kfd->max_proc_per_quantum);
+
        packet = (struct pm4_mes_runlist *)buffer;
 
        memset(buffer, 0, sizeof(struct pm4_mes_runlist));
@@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
        packet->bitfields4.chain = chain ? 1 : 0;
        packet->bitfields4.offload_polling = 0;
        packet->bitfields4.valid = 1;
+       packet->bitfields4.process_cnt = concurrent_proc_cnt;
        packet->ordinal2 = lower_32_bits(ib);
        packet->bitfields3.ib_base_hi = upper_32_bits(ib);
 
index a668764679958bf66e0a50283dc884c7e352ffe4..1edab2199f0b8ef66dbc12b7b83e3f2f58b866e0 100644 (file)
@@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device;
 /* Kernel module parameter to specify the scheduling policy */
 extern int sched_policy;
 
+/*
+ * Kernel module parameter to specify the maximum process
+ * number per HW scheduler
+ */
+extern int hws_max_conc_proc;
+
 extern int cwsr_enable;
 
 /*
@@ -214,6 +220,9 @@ struct kfd_dev {
        /* Debug manager */
        struct kfd_dbgmgr           *dbgmgr;
 
+       /* Maximum process number mapped to HW scheduler */
+       unsigned int max_proc_per_quantum;
+
        /* CWSR */
        bool cwsr_enabled;
        const void *cwsr_isa;