From 8855e37f1210952d466e5f4196234bf0fd003c79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:10 -0300 Subject: [PATCH 01/16] drm/gem: Create shmem GEM object in a given mountpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit be431dfec976e553a08883e26d0d0cc2598a8dfa upstream Create a function `drm_gem_shmem_create_with_mnt()`, similar to `drm_gem_shmem_create()`, that has a mountpoint as a argument. This function will create a shmem GEM object in a given tmpfs mountpoint. This function will be useful for drivers that have a special mountpoint with flags enabled. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-7-mcanal@igalia.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 30 ++++++++++++++++++++++---- include/drm/drm_gem_shmem_helper.h | 3 +++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 53c003983ad183..8508060a1a95cc 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -49,7 +49,8 @@ static const struct drm_gem_object_funcs drm_gem_shmem_funcs = { }; static struct drm_gem_shmem_object * -__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) +__drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private, + struct vfsmount *gemfs) { struct drm_gem_shmem_object *shmem; struct drm_gem_object *obj; @@ -76,7 +77,7 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) drm_gem_private_object_init(dev, obj, size); shmem->map_wc = false; /* dma-buf mappings use always writecombine */ } else { - ret = drm_gem_object_init(dev, obj, size); + ret = drm_gem_object_init_with_mnt(dev, obj, size, gemfs); } if (ret) { drm_gem_private_object_fini(obj); @@ -123,10 +124,31 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) */ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size) { - return __drm_gem_shmem_create(dev, size, false); + return __drm_gem_shmem_create(dev, size, false, NULL); } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); +/** + * drm_gem_shmem_create_with_mnt - Allocate an object with the given size in a + * given mountpoint + * @dev: DRM device + * @size: Size of the object to allocate + * @gemfs: tmpfs mount where the GEM object will be created + * + * This function creates a shmem GEM object in a given tmpfs mountpoint. + * + * Returns: + * A struct drm_gem_shmem_object * on success or an ERR_PTR()-encoded negative + * error code on failure. + */ +struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, + size_t size, + struct vfsmount *gemfs) +{ + return __drm_gem_shmem_create(dev, size, false, gemfs); +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_create_with_mnt); + /** * drm_gem_shmem_free - Free resources associated with a shmem GEM object * @shmem: shmem GEM object to free @@ -765,7 +787,7 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, size_t size = PAGE_ALIGN(attach->dmabuf->size); struct drm_gem_shmem_object *shmem; - shmem = __drm_gem_shmem_create(dev, size, true); + shmem = __drm_gem_shmem_create(dev, size, true, NULL); if (IS_ERR(shmem)) return ERR_CAST(shmem); diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index efbc9f27312b53..d22e3fb53631ab 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -97,6 +97,9 @@ struct drm_gem_shmem_object { container_of(obj, struct drm_gem_shmem_object, base) struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t size); +struct drm_gem_shmem_object *drm_gem_shmem_create_with_mnt(struct drm_device *dev, + size_t size, + struct vfsmount *gemfs); void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_put_pages(struct drm_gem_shmem_object *shmem); From 6ab5966ccfd1b5dda9398198b70a6bf574ec4e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:08 -0300 Subject: [PATCH 02/16] drm/gem: Create a drm_gem_object_init_with_mnt() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0992b2541e1cd9580c2e70fab7a78558de054bae upstream For some applications, such as applications that uses huge pages, we might want to have a different mountpoint, for which we pass mount flags that better match our usecase. Therefore, create a new function `drm_gem_object_init_with_mnt()` that allow us to define the tmpfs mountpoint where the GEM object will be created. If this parameter is NULL, then we fallback to `shmem_file_setup()`. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-5-mcanal@igalia.com --- drivers/gpu/drm/drm_gem.c | 34 ++++++++++++++++++++++++++++++---- include/drm/drm_gem.h | 3 +++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 149b8e25da5bbf..ee811764c3df4b 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -114,22 +114,32 @@ drm_gem_init(struct drm_device *dev) } /** - * drm_gem_object_init - initialize an allocated shmem-backed GEM object + * drm_gem_object_init_with_mnt - initialize an allocated shmem-backed GEM + * object in a given shmfs mountpoint + * * @dev: drm_device the object should be initialized for * @obj: drm_gem_object to initialize * @size: object size + * @gemfs: tmpfs mount where the GEM object will be created. If NULL, use + * the usual tmpfs mountpoint (`shm_mnt`). * * Initialize an already allocated GEM object of the specified size with * shmfs backing store. */ -int drm_gem_object_init(struct drm_device *dev, - struct drm_gem_object *obj, size_t size) +int drm_gem_object_init_with_mnt(struct drm_device *dev, + struct drm_gem_object *obj, size_t size, + struct vfsmount *gemfs) { struct file *filp; drm_gem_private_object_init(dev, obj, size); - filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); + if (gemfs) + filp = shmem_file_setup_with_mnt(gemfs, "drm mm object", size, + VM_NORESERVE); + else + filp = shmem_file_setup("drm mm object", size, VM_NORESERVE); + if (IS_ERR(filp)) return PTR_ERR(filp); @@ -137,6 +147,22 @@ int drm_gem_object_init(struct drm_device *dev, return 0; } +EXPORT_SYMBOL(drm_gem_object_init_with_mnt); + +/** + * drm_gem_object_init - initialize an allocated shmem-backed GEM object + * @dev: drm_device the object should be initialized for + * @obj: drm_gem_object to initialize + * @size: object size + * + * Initialize an already allocated GEM object of the specified size with + * shmfs backing store. + */ +int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, + size_t size) +{ + return drm_gem_object_init_with_mnt(dev, obj, size, NULL); +} EXPORT_SYMBOL(drm_gem_object_init); /** diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index d8b86df2ec0dab..5b8b1b059d32c4 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -473,6 +473,9 @@ void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); +int drm_gem_object_init_with_mnt(struct drm_device *dev, + struct drm_gem_object *obj, size_t size, + struct vfsmount *gemfs); void drm_gem_private_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); void drm_gem_private_object_fini(struct drm_gem_object *obj); From 0a693c062e45f963f549d90d96549b8a152d0549 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Mon, 23 Sep 2024 17:19:59 +0200 Subject: [PATCH 03/16] drm/v3d: Use v3d_perfmon_find() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f2a4bcb2532881655ee546305f2a4fc6e1399a3e usptream Replace the open-coded v3d_perfmon_find() with the real thing. Signed-off-by: Christian Gmeiner Reviewed-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20240923152000.185980-1-christian.gmeiner@gmail.com --- drivers/gpu/drm/v3d/v3d_perfmon.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 6ee56cbd3f1bfc..924814cab46a17 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -409,11 +409,7 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, if (req->pad != 0) return -EINVAL; - mutex_lock(&v3d_priv->perfmon.lock); - perfmon = idr_find(&v3d_priv->perfmon.idr, req->id); - v3d_perfmon_get(perfmon); - mutex_unlock(&v3d_priv->perfmon.lock); - + perfmon = v3d_perfmon_find(v3d_priv, req->id); if (!perfmon) return -EINVAL; From bebba5e7ba14d22922f99f85d29feaf8c6c13404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:07 -0300 Subject: [PATCH 04/16] drm/v3d: Fix return if scheduler initialization fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 56cf76ed784fe5347c9305f7f0f3d0f9cb8d8a3b upstream If the scheduler initialization fails, GEM initialization must fail as well. Therefore, if `v3d_sched_init()` fails, free the DMA memory allocated and return the error value in `v3d_gem_init()`. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-4-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index d01181c0882aaf..e1a04c0bef2997 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -294,8 +294,9 @@ v3d_gem_init(struct drm_device *dev) ret = v3d_sched_init(v3d); if (ret) { drm_mm_takedown(&v3d->mm); - dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt, + dma_free_coherent(v3d->drm.dev, pt_size, (void *)v3d->pt, v3d->pt_paddr); + return ret; } return 0; From 8f8f69980f908bda8ceb75363827554763833205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:09 -0300 Subject: [PATCH 05/16] drm/v3d: Introduce gemfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit eb8d395f68421449c6201d3019f51011d034f00e upstream Create a separate "tmpfs" kernel mount for V3D. This will allow us to move away from the shmemfs `shm_mnt` and gives the flexibility to do things like set our own mount options. Here, the interest is to use "huge=", which should allow us to enable the use of THP for our shmem-backed objects. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-6-mcanal@igalia.com --- drivers/gpu/drm/v3d/Makefile | 3 ++- drivers/gpu/drm/v3d/v3d_drv.h | 9 +++++++ drivers/gpu/drm/v3d/v3d_gem.c | 3 +++ drivers/gpu/drm/v3d/v3d_gemfs.c | 46 +++++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/v3d/v3d_gemfs.c diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile index b7d673f1153bef..fcf710926057b3 100644 --- a/drivers/gpu/drm/v3d/Makefile +++ b/drivers/gpu/drm/v3d/Makefile @@ -13,7 +13,8 @@ v3d-y := \ v3d_trace_points.o \ v3d_sched.o \ v3d_sysfs.o \ - v3d_submit.o + v3d_submit.o \ + v3d_gemfs.o v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 0650859b97d319..10dde66d12924d 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -143,6 +143,11 @@ struct v3d_dev { struct drm_mm mm; spinlock_t mm_lock; + /* + * tmpfs instance used for shmem backed objects + */ + struct vfsmount *gemfs; + struct work_struct overflow_mem_work; struct v3d_bin_job *bin_job; @@ -540,6 +545,10 @@ void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); void v3d_clean_caches(struct v3d_dev *v3d); +/* v3d_gemfs.c */ +void v3d_gemfs_init(struct v3d_dev *v3d); +void v3d_gemfs_fini(struct v3d_dev *v3d); + /* v3d_submit.c */ void v3d_job_cleanup(struct v3d_job *job); void v3d_job_put(struct v3d_job *job); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index e1a04c0bef2997..6b1cbd4be54eae 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -291,6 +291,8 @@ v3d_gem_init(struct drm_device *dev) v3d_init_hw_state(v3d); v3d_mmu_set_page_table(v3d); + v3d_gemfs_init(v3d); + ret = v3d_sched_init(v3d); if (ret) { drm_mm_takedown(&v3d->mm); @@ -308,6 +310,7 @@ v3d_gem_destroy(struct drm_device *dev) struct v3d_dev *v3d = to_v3d_dev(dev); v3d_sched_fini(v3d); + v3d_gemfs_fini(v3d); /* Waiting for jobs to finish would need to be done before * unregistering V3D. diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c new file mode 100644 index 00000000000000..31cf5bd11e393f --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2024 Raspberry Pi */ + +#include +#include + +#include "v3d_drv.h" + +void v3d_gemfs_init(struct v3d_dev *v3d) +{ + char huge_opt[] = "huge=within_size"; + struct file_system_type *type; + struct vfsmount *gemfs; + + /* + * By creating our own shmemfs mountpoint, we can pass in + * mount flags that better match our usecase. However, we + * only do so on platforms which benefit from it. + */ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + goto err; + + type = get_fs_type("tmpfs"); + if (!type) + goto err; + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt); + if (IS_ERR(gemfs)) + goto err; + + v3d->gemfs = gemfs; + drm_info(&v3d->drm, "Using Transparent Hugepages\n"); + + return; + +err: + v3d->gemfs = NULL; + drm_notice(&v3d->drm, + "Transparent Hugepage support is recommended for optimal performance on this platform!\n"); +} + +void v3d_gemfs_fini(struct v3d_dev *v3d) +{ + if (v3d->gemfs) + kern_unmount(v3d->gemfs); +} From 2a72584cf1b92d3fe9f24187ebc2062fe85eebf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:11 -0300 Subject: [PATCH 06/16] drm/v3d: Reduce the alignment of the node allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8dd6074d4719644b676adf9f07d71292bee28e3a upstream Currently, we are using an alignment of 128 kB to insert a node, which ends up wasting memory as we perform plenty of small BOs allocations (<= 4 kB). We require that allocations are aligned to 128Kb so for any allocation smaller than that, we are wasting the difference. This implies that we cannot effectively use the whole 4 GB address space available for the GPU in the RPi 4. Currently, we can allocate up to 32000 BOs of 4 kB (~140 MB) and 3000 BOs of 400 kB (~1,3 GB). This can be quite limiting for applications that have a high memory requirement, such as vkoverhead [1]. By reducing the page alignment to 4 kB, we can allocate up to 1000000 BOs of 4 kB (~4 GB) and 10000 BOs of 400 kB (~4 GB). Moreover, by performing benchmarks, we were able to attest that reducing the page alignment to 4 kB can provide a general performance improvement in OpenGL applications (e.g. glmark2). Therefore, this patch reduces the alignment of the node allocation to 4 kB, which will allow RPi users to explore the whole 4GB virtual address space provided by the hardware. Also, this patch allow users to fully run vkoverhead in the RPi 4/5, solving the issue reported in [1]. [1] https://github.com/zmike/vkoverhead/issues/14 Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-8-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_bo.c | 2 +- drivers/gpu/drm/v3d/v3d_drv.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index ebe52bef4ffb8d..0d65bdc883243c 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -123,7 +123,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) */ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, obj->size >> V3D_MMU_PAGE_SHIFT, - GMP_GRANULARITY >> V3D_MMU_PAGE_SHIFT, 0, 0); + SZ_4K >> V3D_MMU_PAGE_SHIFT, 0, 0); spin_unlock(&v3d->mm_lock); if (ret) return ret; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 10dde66d12924d..c1a210fbe5f3f7 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -19,8 +19,6 @@ struct clk; struct platform_device; struct reset_control; -#define GMP_GRANULARITY (128 * 1024) - #define V3D_MMU_PAGE_SHIFT 12 #define V3D_MAX_QUEUES (V3D_CPU + 1) From 5aa3a8e0deed637233edaa7a93c83cb08877d473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:12 -0300 Subject: [PATCH 07/16] drm/v3d: Support Big/Super Pages when writing out PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e4c17720262f394f6320a2b6e17a128bfdabb37c upstream The V3D MMU also supports 64KB and 1MB pages, called big and super pages, respectively. In order to set a 64KB page or 1MB page in the MMU, we need to make sure that page table entries for all 4KB pages within a big/super page must be correctly configured. In order to create a big/super page, we need a contiguous memory region. That's why we use a separate mountpoint with THP enabled. In order to place the page table entries in the MMU, we iterate over the 16 4KB pages (for big pages) or 256 4KB pages (for super pages) and insert the PTE. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-9-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_mmu.c | 54 ++++++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index c1a210fbe5f3f7..ad552bd99fd556 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -20,6 +20,7 @@ struct platform_device; struct reset_control; #define V3D_MMU_PAGE_SHIFT 12 +#define V3D_PAGE_FACTOR (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT) #define V3D_MAX_QUEUES (V3D_CPU + 1) diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 5bb7821c0243c6..0f564fd7160c13 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -25,9 +25,16 @@ * superpage bit set. */ #define V3D_PTE_SUPERPAGE BIT(31) +#define V3D_PTE_BIGPAGE BIT(30) #define V3D_PTE_WRITEABLE BIT(29) #define V3D_PTE_VALID BIT(28) +static bool v3d_mmu_is_aligned(u32 page, u32 page_address, size_t alignment) +{ + return IS_ALIGNED(page, alignment >> V3D_MMU_PAGE_SHIFT) && + IS_ALIGNED(page_address, alignment >> V3D_MMU_PAGE_SHIFT); +} + int v3d_mmu_flush_all(struct v3d_dev *v3d) { int ret; @@ -78,19 +85,40 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo) struct drm_gem_shmem_object *shmem_obj = &bo->base; struct v3d_dev *v3d = to_v3d_dev(shmem_obj->base.dev); u32 page = bo->node.start; - u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; - struct sg_dma_page_iter dma_iter; - - for_each_sgtable_dma_page(shmem_obj->sgt, &dma_iter, 0) { - dma_addr_t dma_addr = sg_page_iter_dma_address(&dma_iter); - u32 page_address = dma_addr >> V3D_MMU_PAGE_SHIFT; - u32 pte = page_prot | page_address; - u32 i; - - BUG_ON(page_address + (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT) >= - BIT(24)); - for (i = 0; i < PAGE_SIZE >> V3D_MMU_PAGE_SHIFT; i++) - v3d->pt[page++] = pte + i; + struct scatterlist *sgl; + unsigned int count; + + for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, count) { + dma_addr_t dma_addr = sg_dma_address(sgl); + u32 pfn = dma_addr >> V3D_MMU_PAGE_SHIFT; + unsigned int len = sg_dma_len(sgl); + + while (len > 0) { + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; + u32 page_address = page_prot | pfn; + unsigned int i, page_size; + + BUG_ON(pfn + V3D_PAGE_FACTOR >= BIT(24)); + + if (len >= SZ_1M && + v3d_mmu_is_aligned(page, page_address, SZ_1M)) { + page_size = SZ_1M; + page_address |= V3D_PTE_SUPERPAGE; + } else if (len >= SZ_64K && + v3d_mmu_is_aligned(page, page_address, SZ_64K)) { + page_size = SZ_64K; + page_address |= V3D_PTE_BIGPAGE; + } else { + page_size = SZ_4K; + } + + for (i = 0; i < page_size >> V3D_MMU_PAGE_SHIFT; i++) { + v3d->pt[page++] = page_address + i; + pfn++; + } + + len -= page_size; + } } WARN_ON_ONCE(page - bo->node.start != From db8cc375b9d818faec24a8da229f29899cce0dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:13 -0300 Subject: [PATCH 08/16] drm/v3d: Use gemfs/THP in BO creation if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 20d69e8905fc36818ab585cb50f6de48fb8f6de3 upstream Although Big/Super Pages could appear naturally, it would be quite hard to have 1MB or 64KB allocated contiguously naturally. Therefore, we can force the creation of large pages allocated contiguously by using a mountpoint with "huge=within_size" enabled. Therefore, as V3D has a mountpoint with "huge=within_size" (if user has THP enabled), use this mountpoint for BO creation if available. This will allow us to create large pages allocated contiguously and make use of Big/Super Pages. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-10-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_bo.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 0d65bdc883243c..7055f7c7bcfe57 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -107,6 +107,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) struct v3d_dev *v3d = to_v3d_dev(obj->dev); struct v3d_bo *bo = to_v3d_bo(obj); struct sg_table *sgt; + u64 align; int ret; /* So far we pin the BO in the MMU for its lifetime, so use @@ -116,6 +117,15 @@ v3d_bo_create_finish(struct drm_gem_object *obj) if (IS_ERR(sgt)) return PTR_ERR(sgt); + if (!v3d->gemfs) + align = SZ_4K; + else if (obj->size >= SZ_1M) + align = SZ_1M; + else if (obj->size >= SZ_64K) + align = SZ_64K; + else + align = SZ_4K; + spin_lock(&v3d->mm_lock); /* Allocate the object's space in the GPU's page tables. * Inserting PTEs will happen later, but the offset is for the @@ -123,7 +133,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj) */ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, obj->size >> V3D_MMU_PAGE_SHIFT, - SZ_4K >> V3D_MMU_PAGE_SHIFT, 0, 0); + align >> V3D_MMU_PAGE_SHIFT, 0, 0); spin_unlock(&v3d->mm_lock); if (ret) return ret; @@ -143,10 +153,17 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, size_t unaligned_size) { struct drm_gem_shmem_object *shmem_obj; + struct v3d_dev *v3d = to_v3d_dev(dev); struct v3d_bo *bo; int ret; - shmem_obj = drm_gem_shmem_create(dev, unaligned_size); + /* Let the user opt out of allocating the BOs with THP */ + if (v3d->gemfs) + shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, + v3d->gemfs); + else + shmem_obj = drm_gem_shmem_create(dev, unaligned_size); + if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); From 3e71251a8ddeff2ae8c17b3d526cadee122e64cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:14 -0300 Subject: [PATCH 09/16] drm/v3d: Add modparam for turning off Big/Super Pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0df4a13ca8bed29cbc362c99b958fbb8b27b1675 upstream Add a modparam for turning off Big/Super Pages to make sure that if an user doesn't want Big/Super Pages enabled, it can disabled it by setting the modparam to false. Signed-off-by: Maíra Canal Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-11-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 7 +++++++ drivers/gpu/drm/v3d/v3d_drv.h | 1 + drivers/gpu/drm/v3d/v3d_gemfs.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 46e71f733de552..bab6bccc58424b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -40,6 +40,13 @@ #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 +/* Only expose the `super_pages` modparam if THP is enabled. */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +bool super_pages = true; +module_param_named(super_pages, super_pages, bool, 0400); +MODULE_PARM_DESC(super_pages, "Enable/Disable Super Pages support."); +#endif + static int v3d_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index ad552bd99fd556..5629a22780be00 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -545,6 +545,7 @@ void v3d_invalidate_caches(struct v3d_dev *v3d); void v3d_clean_caches(struct v3d_dev *v3d); /* v3d_gemfs.c */ +extern bool super_pages; void v3d_gemfs_init(struct v3d_dev *v3d); void v3d_gemfs_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c index 31cf5bd11e393f..4c5e18590a5cf1 100644 --- a/drivers/gpu/drm/v3d/v3d_gemfs.c +++ b/drivers/gpu/drm/v3d/v3d_gemfs.c @@ -20,6 +20,10 @@ void v3d_gemfs_init(struct v3d_dev *v3d) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) goto err; + /* The user doesn't want to enable Super Pages */ + if (!super_pages) + goto err; + type = get_fs_type("tmpfs"); if (!type) goto err; From 9ae3edc79650736a5ec3695207aae9d2535e7da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 23 Sep 2024 10:55:15 -0300 Subject: [PATCH 10/16] drm/v3d: Expose Super Pages capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9f8e1c93a0d459463819d8bd222196b2655c279f upstream Add a new V3D parameter to expose the support of Super Pages to userspace. The userspace might want to know this information to apply optimizations that are specific to kernels with Super Pages enabled. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240923141348.2422499-12-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 3 +++ include/uapi/drm/v3d_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index bab6bccc58424b..f94ec1e56d9b7b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -108,6 +108,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_MAX_PERF_COUNTERS: args->value = v3d->perfmon_info.max_counters; return 0; + case DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES: + args->value = !!v3d->gemfs; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 87fc5bb0a61e21..2376c73abca1ef 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -290,6 +290,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, }; struct drm_v3d_get_param { From d15e058957c26f3fa82d65f555a7225af07f8b7e Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Tue, 29 Oct 2024 12:24:29 +0100 Subject: [PATCH 11/16] drm/v3d: Drop allocation of object without mountpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d28292aa48d1917db39fac6bcdaf991ba0ed279f upstream Function drm_gem_shmem_create_with_mnt() creates an object without using the mountpoint if gemfs is NULL. Drop the else branch calling drm_gem_shmem_create(). Signed-off-by: Matthias Brugger Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20241029-v3d-v2-1-c0d3dd328d1b@gmail.com --- drivers/gpu/drm/v3d/v3d_bo.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 7055f7c7bcfe57..73ab7dd31b17b2 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -157,13 +157,8 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, struct v3d_bo *bo; int ret; - /* Let the user opt out of allocating the BOs with THP */ - if (v3d->gemfs) - shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, - v3d->gemfs); - else - shmem_obj = drm_gem_shmem_create(dev, unaligned_size); - + shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size, + v3d->gemfs); if (IS_ERR(shmem_obj)) return ERR_CAST(shmem_obj); bo = to_v3d_bo(&shmem_obj->base); From 50b7415b6b5fe172962f0daa68d3d7313f60cf1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 6 Nov 2024 09:16:51 -0300 Subject: [PATCH 12/16] drm/v3d: Fix performance counter source settings on V3D 7.x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e987e22e9229d70c2083a91cc61269b2c4924955 upstream When the new register addresses were introduced for V3D 7.x, we added new masks for performance counter sources on V3D 7.x. Nevertheless, we never apply these new masks when setting the sources. Fix the performance counter source settings on V3D 7.x by introducing a new macro, `V3D_SET_FIELD_VER`, which allows fields setting to vary by version. Using this macro, we can provide different values for source mask based on the V3D version, ensuring that sources are correctly configure on V3D 7.x. Fixes: 0ad5bc1ce463 ("drm/v3d: fix up register addresses for V3D 7.x") Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Reviewed-by: Christian Gmeiner Link: https://patchwork.freedesktop.org/patch/msgid/20241106121736.5707-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_debugfs.c | 4 ++-- drivers/gpu/drm/v3d/v3d_perfmon.c | 15 ++++++++------- drivers/gpu/drm/v3d/v3d_regs.h | 29 +++++++++++++++++------------ 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 19e3ee7ac897fe..76816f2551c100 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -237,8 +237,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) if (v3d->ver >= 40) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(cycle_count_reg, - V3D_PCTR_S0)); + V3D_SET_FIELD_VER(cycle_count_reg, + V3D_PCTR_S0, v3d->ver)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 924814cab46a17..ecf06e8e9fbccb 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -240,17 +240,18 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) for (i = 0; i < ncounters; i++) { u32 source = i / 4; - u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + u32 channel = V3D_SET_FIELD_VER(perfmon->counters[i], V3D_PCTR_S0, + v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S1); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S2); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S3); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3, v3d->ver); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 1b1a62ad95852b..6da3c69082bd6d 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -15,6 +15,14 @@ fieldval & field##_MASK; \ }) +#define V3D_SET_FIELD_VER(value, field, ver) \ + ({ \ + typeof(ver) _ver = (ver); \ + u32 fieldval = (value) << field##_SHIFT(_ver); \ + WARN_ON((fieldval & ~field##_MASK(_ver)) != 0); \ + fieldval & field##_MASK(_ver); \ + }) + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ field##_SHIFT) @@ -354,18 +362,15 @@ #define V3D_V4_PCTR_0_SRC_28_31 0x0067c #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) -# define V3D_PCTR_S0_MASK V3D_MASK(6, 0) -# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) -# define V3D_PCTR_S0_SHIFT 0 -# define V3D_PCTR_S1_MASK V3D_MASK(14, 8) -# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) -# define V3D_PCTR_S1_SHIFT 8 -# define V3D_PCTR_S2_MASK V3D_MASK(22, 16) -# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) -# define V3D_PCTR_S2_SHIFT 16 -# define V3D_PCTR_S3_MASK V3D_MASK(30, 24) -# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) -# define V3D_PCTR_S3_SHIFT 24 +# define V3D_PCTR_S0_MASK(ver) (((ver) >= 71) ? V3D_MASK(7, 0) : V3D_MASK(6, 0)) +# define V3D_PCTR_S0_SHIFT(ver) 0 +# define V3D_PCTR_S1_MASK(ver) (((ver) >= 71) ? V3D_MASK(15, 8) : V3D_MASK(14, 8)) +# define V3D_PCTR_S1_SHIFT(ver) 8 +# define V3D_PCTR_S2_MASK(ver) (((ver) >= 71) ? V3D_MASK(23, 16) : V3D_MASK(22, 16)) +# define V3D_PCTR_S2_SHIFT(ver) 16 +# define V3D_PCTR_S3_MASK(ver) (((ver) >= 71) ? V3D_MASK(31, 24) : V3D_MASK(30, 24)) +# define V3D_PCTR_S3_SHIFT(ver) 24 + #define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ From 09774326ad545000b536b300c01e9352fcb6c6c7 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Mon, 18 Nov 2024 23:19:47 +0100 Subject: [PATCH 13/16] drm/v3d: Stop active perfmon if it is being destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 21f1435b1e6b012a07c42f36b206d2b66fc8f13b upstream If the active performance monitor (`v3d->active_perfmon`) is being destroyed, stop it first. Currently, the active perfmon is not stopped during destruction, leaving the `v3d->active_perfmon` pointer stale. This can lead to undefined behavior and instability. This patch ensures that the active perfmon is stopped before being destroyed, aligning with the behavior introduced in commit 7d1fd3638ee3 ("drm/v3d: Stop the active perfmon before being destroyed"). Cc: stable@vger.kernel.org # v5.15+ Fixes: 26a4dc29b74a ("drm/v3d: Expose performance counters to userspace") Signed-off-by: Christian Gmeiner Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20241118221948.1758130-1-christian.gmeiner@gmail.com --- drivers/gpu/drm/v3d/v3d_perfmon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index ecf06e8e9fbccb..c49abb90954d49 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -384,6 +384,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_perfmon_destroy *req = data; + struct v3d_dev *v3d = v3d_priv->v3d; struct v3d_perfmon *perfmon; mutex_lock(&v3d_priv->perfmon.lock); @@ -393,6 +394,10 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, if (!perfmon) return -EINVAL; + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == v3d->active_perfmon) + v3d_perfmon_stop(v3d, perfmon, false); + v3d_perfmon_put(perfmon); return 0; From 164537efe63d7a259688d24b040956b430792de8 Mon Sep 17 00:00:00 2001 From: Christian Gmeiner Date: Mon, 2 Dec 2024 15:06:13 +0100 Subject: [PATCH 14/16] drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c6eabbab359c156669e10d5dec3e71e80ff09bd2 upstream Add a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, to allow configuration of a global performance monitor (perfmon). Use the global perfmon for all jobs to ensure consistent performance tracking across submissions. This feature is needed to implement a Perfetto datasources in user-space. Signed-off-by: Christian Gmeiner Reviewed-by: Maíra Canal Signed-off-by: Maíra Canal Link: https://patchwork.freedesktop.org/patch/msgid/20241202140615.74802-1-christian.gmeiner@gmail.com --- drivers/gpu/drm/v3d/v3d_drv.c | 1 + drivers/gpu/drm/v3d/v3d_drv.h | 8 +++++++ drivers/gpu/drm/v3d/v3d_perfmon.c | 37 +++++++++++++++++++++++++++++++ drivers/gpu/drm/v3d/v3d_sched.c | 14 +++++++++--- drivers/gpu/drm/v3d/v3d_submit.c | 10 +++++++++ include/uapi/drm/v3d_drm.h | 18 +++++++++++++++ 6 files changed, 85 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index f94ec1e56d9b7b..d520094b83e144 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -228,6 +228,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 5629a22780be00..5cde5cdfd73add 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -189,6 +189,12 @@ struct v3d_dev { u32 num_allocated; u32 pages_allocated; } bo_stats; + + /* To support a performance analysis tool in user space, we require + * a single, globally configured performance monitor (perfmon) for + * all jobs. + */ + struct v3d_perfmon *global_perfmon; }; static inline struct v3d_dev * @@ -600,6 +606,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index c49abb90954d49..3ebda2fa46fc47 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -313,6 +313,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -398,6 +401,9 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -457,3 +463,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, return 0; } + +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_set_global *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_perfmon *perfmon; + + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) + return -EINVAL; + + perfmon = v3d_perfmon_find(v3d_priv, req->id); + if (!perfmon) + return -EINVAL; + + /* If the request is to clear the global performance monitor */ + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { + if (!v3d->global_perfmon) + return -EINVAL; + + xchg(&v3d->global_perfmon, NULL); + + return 0; + } + + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon)) + return -EBUSY; + + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4f935f1d50a943..6a50ed475d57a5 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { - if (job->perfmon != v3d->active_perfmon) + struct v3d_perfmon *perfmon = v3d->global_perfmon; + + if (!perfmon) + perfmon = job->perfmon; + + if (perfmon == v3d->active_perfmon) + return; + + if (perfmon != v3d->active_perfmon) v3d_perfmon_stop(v3d, v3d->active_perfmon, true); - if (job->perfmon && v3d->active_perfmon != job->perfmon) - v3d_perfmon_start(v3d, job->perfmon); + if (perfmon && v3d->active_perfmon != perfmon) + v3d_perfmon_start(v3d, perfmon); } static void diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 284ed23d81956b..6bdbc2e5ec717c 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -1044,6 +1044,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + render->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); @@ -1259,6 +1264,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + job->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); if (!job->base.perfmon) { diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 2376c73abca1ef..dbbc404d2b3dd6 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -43,6 +43,7 @@ extern "C" { #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b #define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -61,6 +62,8 @@ extern "C" { #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -766,6 +769,21 @@ struct drm_v3d_perfmon_get_counter { __u8 reserved[7]; }; +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 + +/** + * struct drm_v3d_perfmon_set_global - ioctl to define a global performance + * monitor + * + * The global performance monitor will be used for all jobs. If a global + * performance monitor is defined, jobs with a self-defined performance + * monitor won't be allowed. + */ +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + #if defined(__cplusplus) } #endif From 6b9af45de1af6c4756a503d07b3d15d389363f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Fri, 6 Dec 2024 12:39:09 -0300 Subject: [PATCH 15/16] drm/v3d: Fix miscellaneous documentation errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4ee06e3b6120 upstream. This commit fixes several miscellaneous documentation errors. Mostly, delete/update comments that are outdated or are leftovers from past code changes. Apart from that, remove double-spaces in several comments. Signed-off-by: Maíra Canal Acked-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20241206153908.62429-1-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_bo.c | 4 ---- drivers/gpu/drm/v3d/v3d_mmu.c | 8 ++++---- drivers/gpu/drm/v3d/v3d_performance_counters.h | 12 +++++------- drivers/gpu/drm/v3d/v3d_sched.c | 12 ++++++------ drivers/gpu/drm/v3d/v3d_submit.c | 9 +++++---- 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 73ab7dd31b17b2..bb7815599435bf 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -13,10 +13,6 @@ * Display engines requiring physically contiguous allocations should * look into Mesa's "renderonly" support (as used by the Mesa pl111 * driver) for an example of how to integrate with V3D. - * - * Long term, we should support evicting pages from the MMU when under - * memory pressure (thus the v3d_bo_get_pages() refcounting), but - * that's not a high priority since our systems tend to not have swap. */ #include diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 0f564fd7160c13..a25d25a8ae617b 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -4,7 +4,7 @@ /** * DOC: Broadcom V3D MMU * - * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has * a single level of page tables for the V3D's 4GB address space to * map to AXI bus addresses, thus it could need up to 4MB of * physically contiguous memory to store the PTEs. @@ -15,14 +15,14 @@ * * To protect clients from each other, we should use the GMP to * quickly mask out (at 128kb granularity) what pages are available to - * each client. This is not yet implemented. + * each client. This is not yet implemented. */ #include "v3d_drv.h" #include "v3d_regs.h" -/* Note: All PTEs for the 1MB superpage must be filled with the - * superpage bit set. +/* Note: All PTEs for the 64KB bigpage or 1MB superpage must be filled + * with the bigpage/superpage bit set. */ #define V3D_PTE_SUPERPAGE BIT(31) #define V3D_PTE_BIGPAGE BIT(30) diff --git a/drivers/gpu/drm/v3d/v3d_performance_counters.h b/drivers/gpu/drm/v3d/v3d_performance_counters.h index d919a2fc94490b..2bc4cce0744a16 100644 --- a/drivers/gpu/drm/v3d/v3d_performance_counters.h +++ b/drivers/gpu/drm/v3d/v3d_performance_counters.h @@ -2,11 +2,12 @@ /* * Copyright (C) 2024 Raspberry Pi */ + #ifndef V3D_PERFORMANCE_COUNTERS_H #define V3D_PERFORMANCE_COUNTERS_H -/* Holds a description of a given performance counter. The index of performance - * counter is given by the array on v3d_performance_counter.h +/* Holds a description of a given performance counter. The index of + * performance counter is given by the array on `v3d_performance_counter.c`. */ struct v3d_perf_counter_desc { /* Category of the counter */ @@ -20,15 +21,12 @@ struct v3d_perf_counter_desc { }; struct v3d_perfmon_info { - /* - * Different revisions of V3D have different total number of + /* Different revisions of V3D have different total number of * performance counters. */ unsigned int max_counters; - /* - * Array of counters valid for the platform. - */ + /* Array of counters valid for the platform. */ const struct v3d_perf_counter_desc *counters; }; diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 6a50ed475d57a5..f17398e12401c7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -5,16 +5,16 @@ * DOC: Broadcom V3D scheduling * * The shared DRM GPU scheduler is used to coordinate submitting jobs - * to the hardware. Each DRM fd (roughly a client process) gets its - * own scheduler entity, which will process jobs in order. The GPU - * scheduler will round-robin between clients to submit the next job. + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will schedule the clients with a FIFO scheduling algorithm. * * For simplicity, and in order to keep latency low for interactive * jobs when bulk background jobs are queued up, we submit a new job * to the HW only when it has completed the last one, instead of - * filling up the CT[01]Q FIFOs with jobs. Similarly, we use - * drm_sched_job_add_dependency() to manage the dependency between bin and - * render, instead of having the clients submit jobs using the HW's + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * `drm_sched_job_add_dependency()` to manage the dependency between bin + * and render, instead of having the clients submit jobs using the HW's * semaphores to interlock between them. */ diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 6bdbc2e5ec717c..c39de2d68fb7fd 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -52,10 +52,11 @@ v3d_clock_up_put(struct v3d_dev *v3d) } /* Takes the reservation lock on all the BOs being referenced, so that - * at queue submit time we can update the reservations. + * we can attach fences and update the reservations after pushing the job + * to the queue. * * We don't lock the RCL the tile alloc/state BOs, or overflow memory - * (all of which are on exec->unref_list). They're entirely private + * (all of which are on render->unref_list). They're entirely private * to v3d, so we don't attach dma-buf fences to them. */ static int @@ -96,11 +97,11 @@ v3d_lock_bo_reservations(struct v3d_job *job, * @bo_count: Number of GEM handles passed in * * The command validator needs to reference BOs by their index within - * the submitted job's BO list. This does the validation of the job's + * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. * * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at v3d_exec_cleanup() time. + * failure, because that will happen at `v3d_job_free()`. */ static int v3d_lookup_bos(struct drm_device *dev, From a740b53212de3623190b9cca67dd1680a822ef7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 13 Jan 2025 12:47:41 -0300 Subject: [PATCH 16/16] drm/v3d: Remove `v3d->cpu_job` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit dc4afc0de9654f88676d77094a38f9451d519011 upstream. CPU jobs, like Cache Clean jobs, execute synchronously once the DRM scheduler starts running them. Consequently, a global `v3d->cpu_job` variable is unnecessary, as everything is managed within the `v3d_cpu_job_run()` function. This commit removes the `v3d->cpu_job` pointer, as it is not needed. Signed-off-by: Maíra Canal Reviewed-by: Jose Maria Casanova Crespo Link: https://patchwork.freedesktop.org/patch/msgid/20250113154741.67520-2-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.h | 1 - drivers/gpu/drm/v3d/v3d_sched.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 5cde5cdfd73add..71733e4d19e79b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -153,7 +153,6 @@ struct v3d_dev { struct v3d_render_job *render_job; struct v3d_tfu_job *tfu_job; struct v3d_csd_job *csd_job; - struct v3d_cpu_job *cpu_job; struct v3d_queue_state queue[V3D_MAX_QUEUES]; diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index f17398e12401c7..230832c16cadec 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -656,8 +656,6 @@ v3d_cpu_job_run(struct drm_sched_job *sched_job) struct v3d_cpu_job *job = to_cpu_job(sched_job); struct v3d_dev *v3d = job->base.v3d; - v3d->cpu_job = job; - if (job->job_type >= ARRAY_SIZE(cpu_job_function)) { DRM_DEBUG_DRIVER("Unknown CPU job: %d\n", job->job_type); return NULL;