diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 18:48:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-09 18:48:37 -0700 |
commit | af3c8d98508d37541d4bf57f13a984a7f73a328c (patch) | |
tree | e8dd974d6ebccd38b1e373be8a5e4a2f8bf3c6ce /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | |
parent | d3e3b7eac886fb1383db2f22b81550fa6d87f62f (diff) | |
parent | 00fc2c26bc46a64545cdf95a1511461ea9acecb4 (diff) | |
download | linux-af3c8d98508d37541d4bf57f13a984a7f73a328c.tar.gz linux-af3c8d98508d37541d4bf57f13a984a7f73a328c.tar.xz |
Merge tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This is the main pull request for the drm, I think I've got one later
driver pull for mediatek SoC driver, I'm undecided on if it needs to
go to you yet.
Otherwise summary below:
Core drm:
- Atomic add driver private objects
- Deprecate preclose hook in modern drivers
- MST bandwidth tracking
- Use kvmalloc in more places
- Add mode_valid hook for crtc/encoder/bridge
- Reduce sync_file construction time
- Documentation updates
- New DRM synchronisation object support
New drivers:
- pl111 - pl111 CLCD display controller
Panel:
- Innolux P079ZCA panel driver
- Add NL12880B20-05, NL192108AC18-02D, P320HVN03 panels
- panel-samsung-s6e3ha2: Add s6e3hf2 panel support
i915:
- SKL+ watermark fixes
- G4x/G33 reset improvements
- DP AUX backlight improvements
- Buffer based GuC/host communication
- New getparam for (sub)slice infomation
- Cannonlake and Coffeelake initial patches
- Execbuf optimisations
radeon/amdgpu:
- Lots of Vega10 bug fixes
- Preliminary raven support
- KIQ support for compute rings
- MEC queue management rework
- DCE6 Audio support
- SR-IOV improvements
- Better radeon/amdgpu selection support
nouveau:
- HDMI stereoscopic support
- Display code rework for >= GM20x GPUs
msm:
- GEM rework for fine-grained locking
- Per-process pagetable work
- HDMI fixes for Snapdragon 820.
vc4:
- Remove 256MB CMA limit from vc4
- Add out-fence support
- Add support for cygnus
- Get/set tiling ioctls support
- Add T-format tiling support for scanout
zte:
- add VGA support.
etnaviv:
- Thermal throttle support for newer GPUs
- Restore userspace buffer cache performance
- dma-buf sync fix
stm:
- add stm32f429 display support
exynos:
- Rework vblank handling
- Fixup sw-trigger code
sun4i:
- V3s display engine support
- HDMI support for older SoCs
- Preliminary work on dual-pipeline SoCs.
rcar-du:
- VSP work
imx-drm:
- Remove counter load enable from PRE
- Double read/write reduction flag support
tegra:
- Documentation for the host1x and drm driver.
- Lots of staging ioctl fixes due to grate project work.
omapdrm:
- dma-buf fence support
- TILER rotation fixes"
* tag 'drm-for-v4.13' of git://people.freedesktop.org/~airlied/linux: (1270 commits)
drm: Remove unused drm_file parameter to drm_syncobj_replace_fence()
drm/amd/powerplay: fix bug fail to remove sysfs when rmmod amdgpu.
amdgpu: Set cik/si_support to 1 by default if radeon isn't built
drm/amdgpu/gfx9: fix driver reload with KIQ
drm/amdgpu/gfx8: fix driver reload with KIQ
drm/amdgpu: Don't call amd_powerplay_destroy() if we don't have powerplay
drm/ttm: Fix use-after-free in ttm_bo_clean_mm
drm/amd/amdgpu: move get memory type function from early init to sw init
drm/amdgpu/cgs: always set reference clock in mode_info
drm/amdgpu: fix vblank_time when displays are off
drm/amd/powerplay: power value format change for Vega10
drm/amdgpu/gfx9: support the amdgpu.disable_cu option
drm/amd/powerplay: change PPSMC_MSG_GetCurrPkgPwr for Vega10
drm/amdgpu: Make amdgpu_cs_parser_init static (v2)
drm/amdgpu/cs: fix a typo in a comment
drm/amdgpu: Fix the exported always on CU bitmap
drm/amdgpu/gfx9: gfx_v9_0_enable_gfx_static_mg_power_gating() can be static
drm/amdgpu/psp: upper_32_bits/lower_32_bits for address setup
drm/amd/powerplay/cz: print message if smc message fails
drm/amdgpu: fix typo in amdgpu_debugfs_test_ib_init
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 101 |
1 files changed, 63 insertions, 38 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index dba8a5b25e66..5f8ada1d872b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -24,6 +24,7 @@ #include "amd_shared.h" #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_gfx.h" #include <linux/module.h> const struct kfd2kgd_calls *kfd2kgd; @@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void) return ret; } -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) { - switch (rdev->asic_type) { + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); @@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void) } } -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - if (rdev->kfd) { + int i; + int last_valid_bit; + if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; - amdgpu_doorbell_get_kfd_info(rdev, + /* this is going to have a few of the MSBs set that we need to + * clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.ready) + clear_bit(amdgpu_gfx_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if + * nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + + amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - kgd2kfd->device_init(rdev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd, &gpu_resources); } } -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; + if (adev->kfd) { + kgd2kfd->device_exit(adev->kfd); + adev->kfd = NULL; } } -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); + if (adev->kfd) + kgd2kfd->interrupt(adev->kfd, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); + if (adev->kfd) + kgd2kfd->suspend(adev->kfd); } -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); + if (adev->kfd) + r = kgd2kfd->resume(adev->kfd); return r; } @@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct kgd_mem **mem = (struct kgd_mem **) mem_obj; int r; @@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, if ((*mem) == NULL) return -ENOMEM; - r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); return r; } @@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, /* map the buffer */ r = amdgpu_bo_reserve((*mem)->bo, true); if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, &(*mem)->gpu_addr); if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } *gpu_addr = (*mem)->gpu_addr; r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } @@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) uint64_t get_vmem_size(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; BUG_ON(kgd == NULL); - return rdev->mc.real_vram_size; + return adev->mc.real_vram_size; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (rdev->gfx.funcs->get_gpu_clock_counter) - return rdev->gfx.funcs->get_gpu_clock_counter(rdev); + if (adev->gfx.funcs->get_gpu_clock_counter) + return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; } |