summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 18:48:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-09 18:48:37 -0700
commitaf3c8d98508d37541d4bf57f13a984a7f73a328c (patch)
treee8dd974d6ebccd38b1e373be8a5e4a2f8bf3c6ce /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
parentd3e3b7eac886fb1383db2f22b81550fa6d87f62f (diff)
parent00fc2c26bc46a64545cdf95a1511461ea9acecb4 (diff)
downloadlinux-af3c8d98508d37541d4bf57f13a984a7f73a328c.tar.gz
linux-af3c8d98508d37541d4bf57f13a984a7f73a328c.tar.xz
Merge tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main pull request for the drm, I think I've got one later driver pull for mediatek SoC driver, I'm undecided on if it needs to go to you yet. Otherwise summary below: Core drm: - Atomic add driver private objects - Deprecate preclose hook in modern drivers - MST bandwidth tracking - Use kvmalloc in more places - Add mode_valid hook for crtc/encoder/bridge - Reduce sync_file construction time - Documentation updates - New DRM synchronisation object support New drivers: - pl111 - pl111 CLCD display controller Panel: - Innolux P079ZCA panel driver - Add NL12880B20-05, NL192108AC18-02D, P320HVN03 panels - panel-samsung-s6e3ha2: Add s6e3hf2 panel support i915: - SKL+ watermark fixes - G4x/G33 reset improvements - DP AUX backlight improvements - Buffer based GuC/host communication - New getparam for (sub)slice infomation - Cannonlake and Coffeelake initial patches - Execbuf optimisations radeon/amdgpu: - Lots of Vega10 bug fixes - Preliminary raven support - KIQ support for compute rings - MEC queue management rework - DCE6 Audio support - SR-IOV improvements - Better radeon/amdgpu selection support nouveau: - HDMI stereoscopic support - Display code rework for >= GM20x GPUs msm: - GEM rework for fine-grained locking - Per-process pagetable work - HDMI fixes for Snapdragon 820. vc4: - Remove 256MB CMA limit from vc4 - Add out-fence support - Add support for cygnus - Get/set tiling ioctls support - Add T-format tiling support for scanout zte: - add VGA support. etnaviv: - Thermal throttle support for newer GPUs - Restore userspace buffer cache performance - dma-buf sync fix stm: - add stm32f429 display support exynos: - Rework vblank handling - Fixup sw-trigger code sun4i: - V3s display engine support - HDMI support for older SoCs - Preliminary work on dual-pipeline SoCs. rcar-du: - VSP work imx-drm: - Remove counter load enable from PRE - Double read/write reduction flag support tegra: - Documentation for the host1x and drm driver. - Lots of staging ioctl fixes due to grate project work. omapdrm: - dma-buf fence support - TILER rotation fixes" * tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux: (1270 commits) drm: Remove unused drm_file parameter to drm_syncobj_replace_fence() drm/amd/powerplay: fix bug fail to remove sysfs when rmmod amdgpu. amdgpu: Set cik/si_support to 1 by default if radeon isn't built drm/amdgpu/gfx9: fix driver reload with KIQ drm/amdgpu/gfx8: fix driver reload with KIQ drm/amdgpu: Don't call amd_powerplay_destroy() if we don't have powerplay drm/ttm: Fix use-after-free in ttm_bo_clean_mm drm/amd/amdgpu: move get memory type function from early init to sw init drm/amdgpu/cgs: always set reference clock in mode_info drm/amdgpu: fix vblank_time when displays are off drm/amd/powerplay: power value format change for Vega10 drm/amdgpu/gfx9: support the amdgpu.disable_cu option drm/amd/powerplay: change PPSMC_MSG_GetCurrPkgPwr for Vega10 drm/amdgpu: Make amdgpu_cs_parser_init static (v2) drm/amdgpu/cs: fix a typo in a comment drm/amdgpu: Fix the exported always on CU bitmap drm/amdgpu/gfx9: gfx_v9_0_enable_gfx_static_mg_power_gating() can be static drm/amdgpu/psp: upper_32_bits/lower_32_bits for address setup drm/amd/powerplay/cz: print message if smc message fails drm/amdgpu: fix typo in amdgpu_debugfs_test_ib_init ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c101
1 files changed, 63 insertions, 38 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b25e66..5f8ada1d872b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -24,6 +24,7 @@
#include "amd_shared.h"
#include <drm/drmP.h>
#include "amdgpu.h"
+#include "amdgpu_gfx.h"
#include <linux/module.h>
const struct kfd2kgd_calls *kfd2kgd;
@@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void)
return ret;
}
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
{
- switch (rdev->asic_type) {
+ switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void)
}
}
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
{
if (kgd2kfd)
- rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
- rdev->pdev, kfd2kgd);
+ adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+ adev->pdev, kfd2kgd);
}
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
- if (rdev->kfd) {
+ int i;
+ int last_valid_bit;
+ if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
- .first_compute_pipe = 1,
- .compute_pipe_count = 4 - 1,
+ .num_mec = adev->gfx.mec.num_mec,
+ .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+ .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
- amdgpu_doorbell_get_kfd_info(rdev,
+ /* this is going to have a few of the MSBs set that we need to
+ * clear */
+ bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+ /* remove the KIQ bit as well */
+ if (adev->gfx.kiq.ring.ready)
+ clear_bit(amdgpu_gfx_queue_to_bit(adev,
+ adev->gfx.kiq.ring.me - 1,
+ adev->gfx.kiq.ring.pipe,
+ adev->gfx.kiq.ring.queue),
+ gpu_resources.queue_bitmap);
+
+ /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+ * nbits is not compile time constant */
+ last_valid_bit = adev->gfx.mec.num_mec
+ * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+ for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ clear_bit(i, gpu_resources.queue_bitmap);
+
+ amdgpu_doorbell_get_kfd_info(adev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
- kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+ kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
}
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
{
- if (rdev->kfd) {
- kgd2kfd->device_exit(rdev->kfd);
- rdev->kfd = NULL;
+ if (adev->kfd) {
+ kgd2kfd->device_exit(adev->kfd);
+ adev->kfd = NULL;
}
}
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
const void *ih_ring_entry)
{
- if (rdev->kfd)
- kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+ if (adev->kfd)
+ kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
}
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
{
- if (rdev->kfd)
- kgd2kfd->suspend(rdev->kfd);
+ if (adev->kfd)
+ kgd2kfd->suspend(adev->kfd);
}
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
{
int r = 0;
- if (rdev->kfd)
- r = kgd2kfd->resume(rdev->kfd);
+ if (adev->kfd)
+ r = kgd2kfd->resume(adev->kfd);
return r;
}
@@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
{
- struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
int r;
@@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
if ((*mem) == NULL)
return -ENOMEM;
- r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
if (r) {
- dev_err(rdev->dev,
+ dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
return r;
}
@@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
/* map the buffer */
r = amdgpu_bo_reserve((*mem)->bo, true);
if (r) {
- dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+ dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
&(*mem)->gpu_addr);
if (r) {
- dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+ dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
*gpu_addr = (*mem)->gpu_addr;
r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
if (r) {
- dev_err(rdev->dev,
+ dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
@@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
uint64_t get_vmem_size(struct kgd_dev *kgd)
{
- struct amdgpu_device *rdev =
+ struct amdgpu_device *adev =
(struct amdgpu_device *)kgd;
BUG_ON(kgd == NULL);
- return rdev->mc.real_vram_size;
+ return adev->mc.real_vram_size;
}
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
{
- struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- if (rdev->gfx.funcs->get_gpu_clock_counter)
- return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
+ if (adev->gfx.funcs->get_gpu_clock_counter)
+ return adev->gfx.funcs->get_gpu_clock_counter(adev);
return 0;
}
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
- struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
/* The sclk is in quantas of 10kHz */
- return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+ return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
}