diff options
author | Dave Airlie <airlied@redhat.com> | 2016-11-11 09:25:32 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-11-11 09:25:32 +1000 |
commit | db8feb6979e91c2e916631a75dbfe9f10f6b05e5 (patch) | |
tree | b4aa5965f207c18d908a794e5f4e647604d77553 /drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
parent | afdd548f742ca454fc343696de472f3aaa5dc488 (diff) | |
parent | 58e197d631d44f9f4817b8198b43132a40de1164 (diff) | |
download | linux-db8feb6979e91c2e916631a75dbfe9f10f6b05e5.tar.gz linux-db8feb6979e91c2e916631a75dbfe9f10f6b05e5.tar.xz |
Merge tag 'drm-intel-next-2016-11-08' of git://anongit.freedesktop.org/git/drm-intel into drm-next
- gpu idling rework for s/r (Imre)
- vlv mappable scanout fix
- speed up probing in resume (Lyude)
- dp audio workarounds for gen9 (Dhinakaran)
- more conversion to using dev_priv internally (Ville)
- more gen9+ wm fixes and cleanups (Maarten)
- shrinker cleanup&fixes (Chris)
- reorg plane init code (Ville)
- implement support for multiple timelines (prep work for scheduler)
from Chris and all
- untangle dev->struct_mutex locking as prep for multiple timelines
(Chris)
- refactor bxt phy code and collect it all in intel_dpio_phy.c (Ander)
- another gvt with bugfixes all over from Zhenyu
- piles of lspcon fixes from Imre
- 90/270 rotation fixes (Ville)
- guc log buffer support (Akash+Sagar)
- fbc fixes from Paulo
- untangle rpm vs. tiling-fences/mmaps (Chris)
- fix atomic commit to wait on the right fences (Daniel Stone)
* tag 'drm-intel-next-2016-11-08' of git://anongit.freedesktop.org/git/drm-intel: (181 commits)
drm/i915: Update DRIVER_DATE to 20161108
drm/i915: Mark CPU cache as dirty when used for rendering
drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode
drm/i915: Make sure engines are idle during GPU idling in LR mode
drm/i915: Avoid early GPU idling due to race with new request
drm/i915: Avoid early GPU idling due to already pending idle work
drm/i915: Limit Valleyview and earlier to only using mappable scanout
drm/i915: Round tile chunks up for constructing partial VMAs
drm/i915: Remove the vma from the object list upon close
drm/i915: Reinit polling before hpd when resuming
drm/i915: Remove redundant reprobe in i915_drm_resume
drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms
drm/i915/dp: BDW cdclk fix for DP audio
drm/i915: Fix pages pin counting around swizzle quirk
drm/i915: Fix test on inputs for vma_compare()
drm/i915/guc: Cache the client mapping
drm/i915: Tidy slab cache allocations
drm/i915: Introduce HAS_64BIT_RELOC
drm/i915: Show the execlist queue in debugfs/i915_engine_info
drm/i915: Unify global_list into global_link
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 77 |
1 files changed, 22 insertions, 55 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e52affdcc125..fb5b44339f71 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,7 +34,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -332,7 +331,8 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->page = -1; cache->vaddr = 0; cache->i915 = i915; - cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + /* Must be a variable in the struct to allow GCC to unroll. */ + cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->node.allocated = false; } @@ -418,15 +418,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, unsigned long offset; void *vaddr; - if (cache->node.allocated) { - wmb(); - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, page), - cache->node.start, I915_CACHE_NONE, 0); - cache->page = page; - return unmask_page(cache->vaddr); - } - if (cache->vaddr) { io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { @@ -466,6 +457,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset = cache->node.start; if (cache->node.allocated) { + wmb(); ggtt->base.insert_page(&ggtt->base, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); @@ -1109,44 +1101,20 @@ err: return ret; } -static unsigned int eb_other_engines(struct drm_i915_gem_request *req) -{ - unsigned int mask; - - mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; - mask <<= I915_BO_ACTIVE_SHIFT; - - return mask; -} - static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; int ret; list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - struct reservation_object *resv; - if (obj->flags & other_rings) { - ret = i915_gem_request_await_object - (req, obj, obj->base.pending_write_domain); - if (ret) - return ret; - } - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - ret = i915_sw_fence_await_reservation - (&req->submit, resv, &i915_fence_ops, - obj->base.pending_write_domain, 10*HZ, - GFP_KERNEL | __GFP_NOWARN); - if (ret < 0) - return ret; - } + ret = i915_gem_request_await_object + (req, obj, obj->base.pending_write_domain); + if (ret) + return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj, false); @@ -1279,6 +1247,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1288,8 +1262,6 @@ void i915_vma_move_to_active(struct i915_vma *vma, GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - obj->dirty = 1; /* be paranoid */ - /* Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is * vital here: mark_active adds to the start of the callback list, @@ -1297,37 +1269,32 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_object_is_active(obj)) - i915_gem_object_get(obj); - i915_gem_object_set_active(obj, idx); - i915_gem_active_set(&obj->last_read[idx], req); + if (!i915_vma_is_active(vma)) + obj->active_count++; + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); if (flags & EXEC_OBJECT_WRITE) { - i915_gem_active_set(&obj->last_write, req); + i915_gem_active_set(&vma->last_write, req); intel_fb_obj_invalidate(obj, ORIGIN_CS); /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; } if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_gem_active_set(&vma->last_fence, req); - - i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); - list_move_tail(&vma->vm_link, &vma->vm->active_list); } static void eb_export_fence(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *req, unsigned int flags) { - struct reservation_object *resv; - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (!resv) - return; + struct reservation_object *resv = obj->resv; /* Ignore errors from failing to allocate the new fence, we can't * handle an error right now. Worst case should be missed |