diff options
author | Dave Airlie <airlied@redhat.com> | 2018-02-16 09:29:27 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-02-16 09:29:27 +1000 |
commit | 76ea0f334e7fb13226e64ee7de928611f5303faf (patch) | |
tree | 4a6b8d8f1b19a68072f2ce8f33add59be9a67be7 /drivers/gpu/drm/vc4 | |
parent | f0308d76906a5b65ec4fcc3b133394caa9f00638 (diff) | |
parent | 1bc3d3cce8c3b44c2b5ac6cee98c830bb40e6b0f (diff) | |
download | linux-76ea0f334e7fb13226e64ee7de928611f5303faf.tar.gz linux-76ea0f334e7fb13226e64ee7de928611f5303faf.tar.xz |
Merge tag 'drm-misc-next-2018-02-13' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for 4.17:
UAPI Changes:
- drm/vc4: Expose performance counters to userspace (Boris)
Cross-subsystem Changes:
- MAINTAINERS: Linus to maintain panel-arm-versatile in -misc (Linus)
Core Changes:
- Only use swiotlb when necessary (Chunming)
Driver Changes:
- drm/panel: Add support for ARM Versatile panels (Linus)
- pl111: Improvements around versatile panel support (Linus)
----------------------------------------
Tagged on 2018-02-06:
drm-misc-next for 4.17:
UAPI Changes:
- Validate mode flags + type (Ville)
- Deprecate unused mode flags PIXMUX, BCAST (Ville)
- Deprecate unused mode types BUILTIN, CRTC_C, CLOCK_C, DEFAULT (Ville)
Cross-subsystem Changes:
- MAINTAINERS: s/Daniel/Maarten/ for drm-misc (Daniel)
Core Changes:
- gem: Export gem functions for drivers to use (Samuel)
- bridge: Introduce bridge timings in drm_bridge (Linus)
- dma-buf: Allow exclusive fence to be bundled in fence array when
calling reservation_object_get_fences_rcu (Christian)
- dp: Add training pattern 4 and HBR3 support to dp helpers (Manasi)
- fourcc: Add alpha bit to formats to avoid driver format LUTs (Maxime)
- mode: Various cleanups + add new device-wide .mode_valid hook (Ville)
- atomic: Fix state leak when non-blocking commits fail (Leo)
NOTE: IIRC, this was cross-picked to -fixes so it might fall out
- crc: Allow polling on the data fd (Maarten)
Driver Changes:
- bridge/vga-dac: Add THS8134* support (Linus)
- tinydrm: Various MIPI DBI improvements/cleanups (Noralf)
- bridge/dw-mipi-dsi: Cleanups + use create_packet helper (Brian)
- drm/sun4i: Add Display Engine frontend support (Maxime)
- drm/sun4i: Add zpos support + increase num planes from 2 to 4 (Maxime)
- various: Use drm_mode_get_hv_timing() to fill plane clip rectangle (Ville)
- stm: Add 8-bit clut support, add dsi phy v1.31 support, +fixes (Phillipe)
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: Chunming Zhou <david1.zhou@amd.com>
Cc: Samuel Li <Samuel.Li@amd.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Noralf Trønnes <noralf@tronnes.org>
Cc: Brian Norris <briannorris@chromium.org>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Philippe Cornu <philippe.cornu@st.com>
Cc: Leo (Sunpeng) Li <sunpeng.li@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
* tag 'drm-misc-next-2018-02-13' of git://anongit.freedesktop.org/drm/drm-misc: (115 commits)
drm/radeon: only enable swiotlb path when need v2
drm/amdgpu: only enable swiotlb alloc when need v2
drm: add func to get max iomem address v2
drm/vc4: Expose performance counters to userspace
drm: Print the pid when debug logging an ioctl error.
drm/stm: ltdc: remove non-alpha color formats on layer 2 for older hw
drm/stm: ltdc: add non-alpha color formats
drm/bridge/synopsys: dsi: Add 1.31 version support
drm/bridge/synopsys: dsi: Add read feature
drm/pl111: Support multiple endpoints on the CLCD
drm/pl111: Support variants with broken VBLANK
drm/pl111: Support variants with broken clock divider
drm/pl111: Handle the Versatile RGB/BGR565 mode
drm/pl111: Properly detect the ARM PL110 variants
drm/panel: Add support for ARM Versatile panels
drm/panel: Device tree bindings for ARM Versatile panels
drm/bridge: Rename argument from crtc to bridge
drm/crc: Add support for polling on the data fd.
drm/sun4i: Use drm_mode_get_hv_timing() to populate plane clip rectangle
drm/rcar-du: Use drm_mode_get_hv_timing() to populate plane clip rectangle
...
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 68 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_perfmon.c | 188 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 23 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 35 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 64 |
9 files changed, 409 insertions, 84 deletions
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index f5500df51686..4a3a868235f8 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -15,6 +15,7 @@ vc4-y := \ vc4_vec.o \ vc4_hvs.o \ vc4_irq.o \ + vc4_perfmon.o \ vc4_plane.o \ vc4_render_cl.o \ vc4_trace_points.o \ diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ceb385fd69c5..94b99c90425a 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -101,6 +101,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_SUPPORTS_THREADED_FS: case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER: case DRM_VC4_PARAM_SUPPORTS_MADVISE: + case DRM_VC4_PARAM_SUPPORTS_PERFMON: args->value = true; break; default: @@ -111,6 +112,26 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, return 0; } +static int vc4_open(struct drm_device *dev, struct drm_file *file) +{ + struct vc4_file *vc4file; + + vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL); + if (!vc4file) + return -ENOMEM; + + vc4_perfmon_open_file(vc4file); + file->driver_priv = vc4file; + return 0; +} + +static void vc4_close(struct drm_device *dev, struct drm_file *file) +{ + struct vc4_file *vc4file = file->driver_priv; + + vc4_perfmon_close_file(vc4file); +} + static const struct vm_operations_struct vc4_vm_ops = { .fault = vc4_fault, .open = drm_gem_vm_open, @@ -143,6 +164,9 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_CREATE, vc4_perfmon_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_DESTROY, vc4_perfmon_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -153,6 +177,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_RENDER | DRIVER_PRIME), .lastclose = drm_fb_helper_lastclose, + .open = vc4_open, + .postclose = vc4_close, .irq_handler = vc4_irq, .irq_preinstall = vc4_irq_preinstall, .irq_postinstall = vc4_irq_postinstall, diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 3af22936d9b3..fefa1664a9f5 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -11,6 +11,8 @@ #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include "uapi/drm/vc4_drm.h" + /* Don't forget to update vc4_bo.c: bo_type_names[] when adding to * this. */ @@ -29,6 +31,36 @@ enum vc4_kernel_bo_type { VC4_BO_TYPE_COUNT }; +/* Performance monitor object. The perform lifetime is controlled by userspace + * using perfmon related ioctls. A perfmon can be attached to a submit_cl + * request, and when this is the case, HW perf counters will be activated just + * before the submit_cl is submitted to the GPU and disabled when the job is + * done. This way, only events related to a specific job will be counted. + */ +struct vc4_perfmon { + /* Tracks the number of users of the perfmon, when this counter reaches + * zero the perfmon is destroyed. + */ + refcount_t refcnt; + + /* Number of counters activated in this perfmon instance + * (should be less than DRM_VC4_MAX_PERF_COUNTERS). + */ + u8 ncounters; + + /* Events counted by the HW perf counters. */ + u8 events[DRM_VC4_MAX_PERF_COUNTERS]; + + /* Storage for counter values. Counters are incremented by the HW + * perf counter values every time the perfmon is attached to a GPU job. + * This way, perfmon users don't have to retrieve the results after + * each job if they want to track events covering several submissions. + * Note that counter values can't be reset, but you can fake a reset by + * destroying the perfmon and creating a new one. + */ + u64 counters[0]; +}; + struct vc4_dev { struct drm_device *dev; @@ -121,6 +153,11 @@ struct vc4_dev { wait_queue_head_t job_wait_queue; struct work_struct job_done_work; + /* Used to track the active perfmon if any. Access to this field is + * protected by job_lock. + */ + struct vc4_perfmon *active_perfmon; + /* List of struct vc4_seqno_cb for callbacks to be made from a * workqueue when the given seqno is passed. */ @@ -406,6 +443,21 @@ struct vc4_exec_info { void *uniforms_v; uint32_t uniforms_p; uint32_t uniforms_size; + + /* Pointer to a performance monitor object if the user requested it, + * NULL otherwise. + */ + struct vc4_perfmon *perfmon; +}; + +/* Per-open file private data. Any driver-specific resource that has to be + * released when the DRM file is closed should be placed here. + */ +struct vc4_file { + struct { + struct idr idr; + struct mutex lock; + } perfmon; }; static inline struct vc4_exec_info * @@ -646,3 +698,19 @@ bool vc4_check_tex_size(struct vc4_exec_info *exec, /* vc4_validate_shader.c */ struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj); + +/* vc4_perfmon.c */ +void vc4_perfmon_get(struct vc4_perfmon *perfmon); +void vc4_perfmon_put(struct vc4_perfmon *perfmon); +void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon); +void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, + bool capture); +struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id); +void vc4_perfmon_open_file(struct vc4_file *vc4file); +void vc4_perfmon_close_file(struct vc4_file *vc4file); +int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index c94cce96544c..2107b0daf8ef 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -467,14 +467,30 @@ again: vc4_flush_caches(dev); + /* Only start the perfmon if it was not already started by a previous + * job. + */ + if (exec->perfmon && vc4->active_perfmon != exec->perfmon) + vc4_perfmon_start(vc4, exec->perfmon); + /* Either put the job in the binner if it uses the binner, or * immediately move it to the to-be-rendered queue. */ if (exec->ct0ca != exec->ct0ea) { submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); } else { + struct vc4_exec_info *next; + vc4_move_job_to_render(dev, exec); - goto again; + next = vc4_first_bin_job(vc4); + + /* We can't start the next bin job if the previous job had a + * different perfmon instance attached to it. The same goes + * if one of them had a perfmon attached to it and the other + * one doesn't. + */ + if (next && next->perfmon == exec->perfmon) + goto again; } } @@ -642,6 +658,7 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_exec_info *renderjob; uint64_t seqno; unsigned long irqflags; struct vc4_fence *fence; @@ -667,11 +684,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, list_add_tail(&exec->head, &vc4->bin_job_list); - /* If no job was executing, kick ours off. Otherwise, it'll - * get started when the previous job's flush done interrupt - * occurs. + /* If no bin job was executing and if the render job (if any) has the + * same perfmon as our job attached to it (or if both jobs don't have + * perfmon activated), then kick ours off. Otherwise, it'll get + * started when the previous job's flush/render done interrupt occurs. */ - if (vc4_first_bin_job(vc4) == exec) { + renderjob = vc4_first_render_job(vc4); + if (vc4_first_bin_job(vc4) == exec && + (!renderjob || renderjob->perfmon == exec->perfmon)) { vc4_submit_next_bin_job(dev); vc4_queue_hangcheck(dev); } @@ -936,6 +956,9 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) vc4->bin_alloc_used &= ~exec->bin_slots; spin_unlock_irqrestore(&vc4->job_lock, irqflags); + /* Release the reference we had on the perf monitor. */ + vc4_perfmon_put(exec->perfmon); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -1088,6 +1111,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; @@ -1101,6 +1125,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + if (args->pad2 != 0) { + DRM_DEBUG("->pad2 must be set to zero\n"); + return -EINVAL; + } + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1126,6 +1155,15 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->perfmonid) { + exec->perfmon = vc4_perfmon_find(vc4file, + args->perfmonid); + if (!exec->perfmon) { + ret = -ENOENT; + goto fail; + } + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 3dd62d75f531..4cd2ccfe15f4 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -104,13 +104,20 @@ static void vc4_irq_finish_bin_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_exec_info *exec = vc4_first_bin_job(vc4); + struct vc4_exec_info *next, *exec = vc4_first_bin_job(vc4); if (!exec) return; vc4_move_job_to_render(dev, exec); - vc4_submit_next_bin_job(dev); + next = vc4_first_bin_job(vc4); + + /* Only submit the next job in the bin list if it matches the perfmon + * attached to the one that just finished (or if both jobs don't have + * perfmon attached to them). + */ + if (next && next->perfmon == exec->perfmon) + vc4_submit_next_bin_job(dev); } static void @@ -122,6 +129,10 @@ vc4_cancel_bin_job(struct drm_device *dev) if (!exec) return; + /* Stop the perfmon so that the next bin job can be started. */ + if (exec->perfmon) + vc4_perfmon_stop(vc4, exec->perfmon, false); + list_move_tail(&exec->head, &vc4->bin_job_list); vc4_submit_next_bin_job(dev); } @@ -131,18 +142,41 @@ vc4_irq_finish_render_job(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *exec = vc4_first_render_job(vc4); + struct vc4_exec_info *nextbin, *nextrender; if (!exec) return; vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); + + nextbin = vc4_first_bin_job(vc4); + nextrender = vc4_first_render_job(vc4); + + /* Only stop the perfmon if following jobs in the queue don't expect it + * to be enabled. + */ + if (exec->perfmon && !nextrender && + (!nextbin || nextbin->perfmon != exec->perfmon)) + vc4_perfmon_stop(vc4, exec->perfmon, true); + + /* If there's a render job waiting, start it. If this is not the case + * we may have to unblock the binner if it's been stalled because of + * perfmon (this can be checked by comparing the perfmon attached to + * the finished renderjob to the one attached to the next bin job: if + * they don't match, this means the binner is stalled and should be + * restarted). + */ + if (nextrender) + vc4_submit_next_render_job(dev); + else if (nextbin && nextbin->perfmon != exec->perfmon) + vc4_submit_next_bin_job(dev); + if (exec->fence) { dma_fence_signal_locked(exec->fence); dma_fence_put(exec->fence); exec->fence = NULL; } - vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); schedule_work(&vc4->job_done_work); diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c new file mode 100644 index 000000000000..437e7a27f21d --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Broadcom + */ + +/** + * DOC: VC4 V3D performance monitor module + * + * The V3D block provides 16 hardware counters which can count various events. + */ + +#include "vc4_drv.h" +#include "vc4_regs.h" + +#define VC4_PERFMONID_MIN 1 +#define VC4_PERFMONID_MAX U32_MAX + +void vc4_perfmon_get(struct vc4_perfmon *perfmon) +{ + if (perfmon) + refcount_inc(&perfmon->refcnt); +} + +void vc4_perfmon_put(struct vc4_perfmon *perfmon) +{ + if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) + kfree(perfmon); +} + +void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon) +{ + unsigned int i; + u32 mask; + + if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon)) + return; + + for (i = 0; i < perfmon->ncounters; i++) + V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]); + + mask = GENMASK(perfmon->ncounters - 1, 0); + V3D_WRITE(V3D_PCTRC, mask); + V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask); + vc4->active_perfmon = perfmon; +} + +void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon, + bool capture) +{ + unsigned int i; + + if (WARN_ON_ONCE(!vc4->active_perfmon || + perfmon != vc4->active_perfmon)) + return; + + if (capture) { + for (i = 0; i < perfmon->ncounters; i++) + perfmon->counters[i] += V3D_READ(V3D_PCTR(i)); + } + + V3D_WRITE(V3D_PCTRE, 0); + vc4->active_perfmon = NULL; +} + +struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id) +{ + struct vc4_perfmon *perfmon; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_find(&vc4file->perfmon.idr, id); + vc4_perfmon_get(perfmon); + mutex_unlock(&vc4file->perfmon.lock); + + return perfmon; +} + +void vc4_perfmon_open_file(struct vc4_file *vc4file) +{ + mutex_init(&vc4file->perfmon.lock); + idr_init(&vc4file->perfmon.idr); +} + +static int vc4_perfmon_idr_del(int id, void *elem, void *data) +{ + struct vc4_perfmon *perfmon = elem; + + vc4_perfmon_put(perfmon); + + return 0; +} + +void vc4_perfmon_close_file(struct vc4_file *vc4file) +{ + mutex_lock(&vc4file->perfmon.lock); + idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL); + idr_destroy(&vc4file->perfmon.idr); + mutex_unlock(&vc4file->perfmon.lock); +} + +int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_create *req = data; + struct vc4_perfmon *perfmon; + unsigned int i; + int ret; + + /* Number of monitored counters cannot exceed HW limits. */ + if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS || + !req->ncounters) + return -EINVAL; + + /* Make sure all events are valid. */ + for (i = 0; i < req->ncounters; i++) { + if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS) + return -EINVAL; + } + + perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)), + GFP_KERNEL); + if (!perfmon) + return -ENOMEM; + + for (i = 0; i < req->ncounters; i++) + perfmon->events[i] = req->events[i]; + + perfmon->ncounters = req->ncounters; + + refcount_set(&perfmon->refcnt, 1); + + mutex_lock(&vc4file->perfmon.lock); + ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN, + VC4_PERFMONID_MAX, GFP_KERNEL); + mutex_unlock(&vc4file->perfmon.lock); + + if (ret < 0) { + kfree(perfmon); + return ret; + } + + req->id = ret; + return 0; +} + +int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_destroy *req = data; + struct vc4_perfmon *perfmon; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_remove(&vc4file->perfmon.idr, req->id); + mutex_unlock(&vc4file->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + vc4_perfmon_put(perfmon); + return 0; +} + +int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_file *vc4file = file_priv->driver_priv; + struct drm_vc4_perfmon_get_values *req = data; + struct vc4_perfmon *perfmon; + int ret; + + mutex_lock(&vc4file->perfmon.lock); + perfmon = idr_find(&vc4file->perfmon.idr, req->id); + vc4_perfmon_get(perfmon); + mutex_unlock(&vc4file->perfmon.lock); + + if (!perfmon) + return -EINVAL; + + if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters, + perfmon->ncounters * sizeof(u64))) + ret = -EFAULT; + else + ret = 0; + + vc4_perfmon_put(perfmon); + return ret; +} diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 515f97997624..61ad955645a5 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -85,47 +85,46 @@ static const struct hvs_format { u32 drm; /* DRM_FORMAT_* */ u32 hvs; /* HVS_FORMAT_* */ u32 pixel_order; - bool has_alpha; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ARGB, }, { .drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, - .pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ARGB, }, { .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, - .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XRGB, }, { .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565, - .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XBGR, }, { .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, - .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_ABGR, }, { .drm = DRM_FORMAT_RGB888, .hvs = HVS_PIXEL_FORMAT_RGB888, - .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XRGB, }, { .drm = DRM_FORMAT_BGR888, .hvs = HVS_PIXEL_FORMAT_RGB888, - .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + .pixel_order = HVS_PIXEL_ORDER_XBGR, }, { .drm = DRM_FORMAT_YUV422, @@ -622,7 +621,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 2: Source Image Size, Alpha Mode */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(format->has_alpha ? + VC4_SET_FIELD(fb->format->has_alpha ? SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 55677bd50f66..b9749cb24063 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -122,38 +122,9 @@ #define V3D_VPMBASE 0x00504 #define V3D_PCTRC 0x00670 #define V3D_PCTRE 0x00674 -#define V3D_PCTR0 0x00680 -#define V3D_PCTRS0 0x00684 -#define V3D_PCTR1 0x00688 -#define V3D_PCTRS1 0x0068c -#define V3D_PCTR2 0x00690 -#define V3D_PCTRS2 0x00694 -#define V3D_PCTR3 0x00698 -#define V3D_PCTRS3 0x0069c -#define V3D_PCTR4 0x006a0 -#define V3D_PCTRS4 0x006a4 -#define V3D_PCTR5 0x006a8 -#define V3D_PCTRS5 0x006ac -#define V3D_PCTR6 0x006b0 -#define V3D_PCTRS6 0x006b4 -#define V3D_PCTR7 0x006b8 -#define V3D_PCTRS7 0x006bc -#define V3D_PCTR8 0x006c0 -#define V3D_PCTRS8 0x006c4 -#define V3D_PCTR9 0x006c8 -#define V3D_PCTRS9 0x006cc -#define V3D_PCTR10 0x006d0 -#define V3D_PCTRS10 0x006d4 -#define V3D_PCTR11 0x006d8 -#define V3D_PCTRS11 0x006dc -#define V3D_PCTR12 0x006e0 -#define V3D_PCTRS12 0x006e4 -#define V3D_PCTR13 0x006e8 -#define V3D_PCTRS13 0x006ec -#define V3D_PCTR14 0x006f0 -#define V3D_PCTRS14 0x006f4 -#define V3D_PCTR15 0x006f8 -#define V3D_PCTRS15 0x006fc +# define V3D_PCTRE_EN BIT(31) +#define V3D_PCTR(x) (0x00680 + ((x) * 8)) +#define V3D_PCTRS(x) (0x00684 + ((x) * 8)) #define V3D_DBGE 0x00f00 #define V3D_FDBGO 0x00f04 #define V3D_FDBGB 0x00f08 diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 493f392b3a0a..bfc2fa73d2ae 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -68,38 +68,38 @@ static const struct { REGDEF(V3D_VPMBASE), REGDEF(V3D_PCTRC), REGDEF(V3D_PCTRE), - REGDEF(V3D_PCTR0), - REGDEF(V3D_PCTRS0), - REGDEF(V3D_PCTR1), - REGDEF(V3D_PCTRS1), - REGDEF(V3D_PCTR2), - REGDEF(V3D_PCTRS2), - REGDEF(V3D_PCTR3), - REGDEF(V3D_PCTRS3), - REGDEF(V3D_PCTR4), - REGDEF(V3D_PCTRS4), - REGDEF(V3D_PCTR5), - REGDEF(V3D_PCTRS5), - REGDEF(V3D_PCTR6), - REGDEF(V3D_PCTRS6), - REGDEF(V3D_PCTR7), - REGDEF(V3D_PCTRS7), - REGDEF(V3D_PCTR8), - REGDEF(V3D_PCTRS8), - REGDEF(V3D_PCTR9), - REGDEF(V3D_PCTRS9), - REGDEF(V3D_PCTR10), - REGDEF(V3D_PCTRS10), - REGDEF(V3D_PCTR11), - REGDEF(V3D_PCTRS11), - REGDEF(V3D_PCTR12), - REGDEF(V3D_PCTRS12), - REGDEF(V3D_PCTR13), - REGDEF(V3D_PCTRS13), - REGDEF(V3D_PCTR14), - REGDEF(V3D_PCTRS14), - REGDEF(V3D_PCTR15), - REGDEF(V3D_PCTRS15), + REGDEF(V3D_PCTR(0)), + REGDEF(V3D_PCTRS(0)), + REGDEF(V3D_PCTR(1)), + REGDEF(V3D_PCTRS(1)), + REGDEF(V3D_PCTR(2)), + REGDEF(V3D_PCTRS(2)), + REGDEF(V3D_PCTR(3)), + REGDEF(V3D_PCTRS(3)), + REGDEF(V3D_PCTR(4)), + REGDEF(V3D_PCTRS(4)), + REGDEF(V3D_PCTR(5)), + REGDEF(V3D_PCTRS(5)), + REGDEF(V3D_PCTR(6)), + REGDEF(V3D_PCTRS(6)), + REGDEF(V3D_PCTR(7)), + REGDEF(V3D_PCTRS(7)), + REGDEF(V3D_PCTR(8)), + REGDEF(V3D_PCTRS(8)), + REGDEF(V3D_PCTR(9)), + REGDEF(V3D_PCTRS(9)), + REGDEF(V3D_PCTR(10)), + REGDEF(V3D_PCTRS(10)), + REGDEF(V3D_PCTR(11)), + REGDEF(V3D_PCTRS(11)), + REGDEF(V3D_PCTR(12)), + REGDEF(V3D_PCTRS(12)), + REGDEF(V3D_PCTR(13)), + REGDEF(V3D_PCTRS(13)), + REGDEF(V3D_PCTR(14)), + REGDEF(V3D_PCTRS(14)), + REGDEF(V3D_PCTR(15)), + REGDEF(V3D_PCTRS(15)), REGDEF(V3D_DBGE), REGDEF(V3D_FDBGO), REGDEF(V3D_FDBGB), |