Skip to content

Commit ed29c26

Browse files
mlankhorstdanvet
authored andcommitted
drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.
Instead of doing what we do currently, which will never work with PROVE_LOCKING, do the same as AMD does, and something similar to relocation slowpath. When all locks are dropped, we acquire the pages for pinning. When the locks are taken, we transfer those pages in .get_pages() to the bo. As a final check before installing the fences, we ensure that the mmu notifier was not called; if it is, we return -EAGAIN to userspace to signal it has to start over. Changes since v1: - Unbinding is done in submit_init only. submit_begin() removed. - MMU_NOTFIER -> MMU_NOTIFIER Changes since v2: - Make i915->mm.notifier a spinlock. Changes since v3: - Add WARN_ON if there are any page references left, should have been 0. - Return 0 on success in submit_init(), bug from spinlock conversion. - Release pvec outside of notifier_lock (Thomas). Changes since v4: - Mention why we're clearing eb->[i + 1].vma in the code. (Thomas) - Actually check all invalidations in eb_move_to_gpu. (Thomas) - Do not wait when process is exiting to fix gem_ctx_persistence.userptr. Changes since v5: - Clarify why check on PF_EXITING is (temporarily) required. Changes since v6: - Ensure userptr validity is checked in set_domain through a special path. Signed-off-by: Maarten Lankhorst <[email protected]> Acked-by: Dave Airlie <[email protected]> [danvet: s/kfree/kvfree/ in i915_gem_object_userptr_drop_ref in the previous review round, but which got lost. The other open questions around page refcount are imo better discussed in a separate series, with amdgpu folks involved]. Reviewed-by: Thomas Hellström <[email protected]> Signed-off-by: Daniel Vetter <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 20ee27b commit ed29c26

File tree

8 files changed

+395
-584
lines changed

8 files changed

+395
-584
lines changed

drivers/gpu/drm/i915/gem/i915_gem_domain.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,14 +533,28 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
533533
if (err)
534534
goto out;
535535

536+
if (i915_gem_object_is_userptr(obj)) {
537+
/*
538+
* Try to grab userptr pages, iris uses set_domain to check
539+
* userptr validity
540+
*/
541+
err = i915_gem_object_userptr_validate(obj);
542+
if (!err)
543+
err = i915_gem_object_wait(obj,
544+
I915_WAIT_INTERRUPTIBLE |
545+
I915_WAIT_PRIORITY |
546+
(write_domain ? I915_WAIT_ALL : 0),
547+
MAX_SCHEDULE_TIMEOUT);
548+
goto out;
549+
}
550+
536551
/*
537552
* Proxy objects do not control access to the backing storage, ergo
538553
* they cannot be used as a means to manipulate the cache domain
539554
* tracking for that backing storage. The proxy object is always
540555
* considered to be outside of any cache domain.
541556
*/
542-
if (i915_gem_object_is_proxy(obj) &&
543-
!i915_gem_object_is_userptr(obj)) {
557+
if (i915_gem_object_is_proxy(obj)) {
544558
err = -ENXIO;
545559
goto out;
546560
}

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

Lines changed: 88 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,16 @@ enum {
5353
/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
5454
#define __EXEC_OBJECT_HAS_PIN BIT(30)
5555
#define __EXEC_OBJECT_HAS_FENCE BIT(29)
56-
#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
57-
#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
58-
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above + */
56+
#define __EXEC_OBJECT_USERPTR_INIT BIT(28)
57+
#define __EXEC_OBJECT_NEEDS_MAP BIT(27)
58+
#define __EXEC_OBJECT_NEEDS_BIAS BIT(26)
59+
#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above + */
5960
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
6061

6162
#define __EXEC_HAS_RELOC BIT(31)
6263
#define __EXEC_ENGINE_PINNED BIT(30)
63-
#define __EXEC_INTERNAL_FLAGS (~0u << 30)
64+
#define __EXEC_USERPTR_USED BIT(29)
65+
#define __EXEC_INTERNAL_FLAGS (~0u << 29)
6466
#define UPDATE PIN_OFFSET_FIXED
6567

6668
#define BATCH_OFFSET_BIAS (256*1024)
@@ -871,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
871873
}
872874

873875
eb_add_vma(eb, i, batch, vma);
876+
877+
if (i915_gem_object_is_userptr(vma->obj)) {
878+
err = i915_gem_object_userptr_submit_init(vma->obj);
879+
if (err) {
880+
if (i + 1 < eb->buffer_count) {
881+
/*
882+
* Execbuffer code expects last vma entry to be NULL,
883+
* since we already initialized this entry,
884+
* set the next value to NULL or we mess up
885+
* cleanup handling.
886+
*/
887+
eb->vma[i + 1].vma = NULL;
888+
}
889+
890+
return err;
891+
}
892+
893+
eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
894+
eb->args->flags |= __EXEC_USERPTR_USED;
895+
}
874896
}
875897

876898
if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
@@ -972,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
972994
}
973995
}
974996

975-
static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
997+
static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
976998
{
977999
const unsigned int count = eb->buffer_count;
9781000
unsigned int i;
@@ -986,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
9861008

9871009
eb_unreserve_vma(ev);
9881010

1011+
if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
1012+
ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
1013+
i915_gem_object_userptr_submit_fini(vma->obj);
1014+
}
1015+
9891016
if (final)
9901017
i915_vma_put(vma);
9911018
}
@@ -1923,6 +1950,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
19231950
return 0;
19241951
}
19251952

1953+
static int eb_reinit_userptr(struct i915_execbuffer *eb)
1954+
{
1955+
const unsigned int count = eb->buffer_count;
1956+
unsigned int i;
1957+
int ret;
1958+
1959+
if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
1960+
return 0;
1961+
1962+
for (i = 0; i < count; i++) {
1963+
struct eb_vma *ev = &eb->vma[i];
1964+
1965+
if (!i915_gem_object_is_userptr(ev->vma->obj))
1966+
continue;
1967+
1968+
ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
1969+
if (ret)
1970+
return ret;
1971+
1972+
ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
1973+
}
1974+
1975+
return 0;
1976+
}
1977+
19261978
static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
19271979
struct i915_request *rq)
19281980
{
@@ -1937,7 +1989,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
19371989
}
19381990

19391991
/* We may process another execbuffer during the unlock... */
1940-
eb_release_vmas(eb, false);
1992+
eb_release_vmas(eb, false, true);
19411993
i915_gem_ww_ctx_fini(&eb->ww);
19421994

19431995
if (rq) {
@@ -1978,10 +2030,8 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
19782030
err = 0;
19792031
}
19802032

1981-
#ifdef CONFIG_MMU_NOTIFIER
19822033
if (!err)
1983-
flush_workqueue(eb->i915->mm.userptr_wq);
1984-
#endif
2034+
err = eb_reinit_userptr(eb);
19852035

19862036
err_relock:
19872037
i915_gem_ww_ctx_init(&eb->ww, true);
@@ -2043,7 +2093,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
20432093

20442094
err:
20452095
if (err == -EDEADLK) {
2046-
eb_release_vmas(eb, false);
2096+
eb_release_vmas(eb, false, false);
20472097
err = i915_gem_ww_ctx_backoff(&eb->ww);
20482098
if (!err)
20492099
goto repeat_validate;
@@ -2140,7 +2190,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
21402190

21412191
err:
21422192
if (err == -EDEADLK) {
2143-
eb_release_vmas(eb, false);
2193+
eb_release_vmas(eb, false, false);
21442194
err = i915_gem_ww_ctx_backoff(&eb->ww);
21452195
if (!err)
21462196
goto retry;
@@ -2215,6 +2265,30 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
22152265
flags | __EXEC_OBJECT_NO_RESERVE);
22162266
}
22172267

2268+
#ifdef CONFIG_MMU_NOTIFIER
2269+
if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
2270+
spin_lock(&eb->i915->mm.notifier_lock);
2271+
2272+
/*
2273+
* count is always at least 1, otherwise __EXEC_USERPTR_USED
2274+
* could not have been set
2275+
*/
2276+
for (i = 0; i < count; i++) {
2277+
struct eb_vma *ev = &eb->vma[i];
2278+
struct drm_i915_gem_object *obj = ev->vma->obj;
2279+
2280+
if (!i915_gem_object_is_userptr(obj))
2281+
continue;
2282+
2283+
err = i915_gem_object_userptr_submit_done(obj);
2284+
if (err)
2285+
break;
2286+
}
2287+
2288+
spin_unlock(&eb->i915->mm.notifier_lock);
2289+
}
2290+
#endif
2291+
22182292
if (unlikely(err))
22192293
goto err_skip;
22202294

@@ -3359,7 +3433,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
33593433

33603434
err = eb_lookup_vmas(&eb);
33613435
if (err) {
3362-
eb_release_vmas(&eb, true);
3436+
eb_release_vmas(&eb, true, true);
33633437
goto err_engine;
33643438
}
33653439

@@ -3431,6 +3505,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
34313505

34323506
trace_i915_request_queue(eb.request, eb.batch_flags);
34333507
err = eb_submit(&eb, batch);
3508+
34343509
err_request:
34353510
i915_request_get(eb.request);
34363511
err = eb_request_add(&eb, err);
@@ -3451,7 +3526,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
34513526
i915_request_put(eb.request);
34523527

34533528
err_vma:
3454-
eb_release_vmas(&eb, true);
3529+
eb_release_vmas(&eb, true, true);
34553530
if (eb.trampoline)
34563531
i915_vma_unpin(eb.trampoline);
34573532
WARN_ON(err == -EDEADLK);

drivers/gpu/drm/i915/gem/i915_gem_object.h

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
3333
const void *data, resource_size_t size);
3434

3535
extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
36+
3637
void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
3738
struct sg_table *pages,
3839
bool needs_clflush);
@@ -252,12 +253,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
252253
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
253254
}
254255

255-
static inline bool
256-
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
257-
{
258-
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
259-
}
260-
261256
static inline bool
262257
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
263258
{
@@ -548,16 +543,6 @@ void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
548543
void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
549544
enum fb_op_origin origin);
550545

551-
static inline bool
552-
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
553-
{
554-
#ifdef CONFIG_MMU_NOTIFIER
555-
return obj->userptr.mm;
556-
#else
557-
return false;
558-
#endif
559-
}
560-
561546
static inline void
562547
i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
563548
enum fb_op_origin origin)
@@ -578,4 +563,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
578563

579564
bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);
580565

566+
#ifdef CONFIG_MMU_NOTIFIER
567+
static inline bool
568+
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
569+
{
570+
return obj->userptr.notifier.mm;
571+
}
572+
573+
int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
574+
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
575+
void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
576+
int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
577+
#else
578+
static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }
579+
580+
static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
581+
static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
582+
static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
583+
static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
584+
585+
#endif
586+
581587
#endif

drivers/gpu/drm/i915/gem/i915_gem_object_types.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#ifndef __I915_GEM_OBJECT_TYPES_H__
88
#define __I915_GEM_OBJECT_TYPES_H__
99

10+
#include <linux/mmu_notifier.h>
11+
1012
#include <drm/drm_gem.h>
1113
#include <uapi/drm/i915_drm.h>
1214

@@ -34,7 +36,6 @@ struct drm_i915_gem_object_ops {
3436
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(2)
3537
#define I915_GEM_OBJECT_IS_PROXY BIT(3)
3638
#define I915_GEM_OBJECT_NO_MMAP BIT(4)
37-
#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(5)
3839

3940
/* Interface between the GEM object and its backing storage.
4041
* get_pages() is called once prior to the use of the associated set
@@ -293,10 +294,11 @@ struct drm_i915_gem_object {
293294
#ifdef CONFIG_MMU_NOTIFIER
294295
struct i915_gem_userptr {
295296
uintptr_t ptr;
297+
unsigned long notifier_seq;
296298

297-
struct i915_mm_struct *mm;
298-
struct i915_mmu_object *mmu_object;
299-
struct work_struct *work;
299+
struct mmu_interval_notifier notifier;
300+
struct page **pvec;
301+
int page_ref;
300302
} userptr;
301303
#endif
302304

drivers/gpu/drm/i915/gem/i915_gem_pages.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
226226
* get_pages backends we should be better able to handle the
227227
* cancellation of the async task in a more uniform manner.
228228
*/
229-
if (!pages && !i915_gem_object_needs_async_cancel(obj))
229+
if (!pages)
230230
pages = ERR_PTR(-EINVAL);
231231

232232
if (!IS_ERR(pages))

0 commit comments

Comments
 (0)