slub: Fix kmem_cache_destroy() with SLAB_DESTROY_BY_RCU
[linux-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27
28 #include "drmP.h"
29 #include "drm.h"
30 #include "i915_drm.h"
31 #include "i915_drv.h"
32 #include <linux/swap.h>
33 #include <linux/pci.h>
34
35 #define I915_GEM_GPU_DOMAINS    (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
36
37 static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
38 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
39 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
40 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
41                                              int write);
42 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
43                                                      uint64_t offset,
44                                                      uint64_t size);
45 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
46 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
47 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
48                                            unsigned alignment);
49 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
50 static int i915_gem_evict_something(struct drm_device *dev);
51 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
52                                 struct drm_i915_gem_pwrite *args,
53                                 struct drm_file *file_priv);
54
55 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
56                      unsigned long end)
57 {
58         drm_i915_private_t *dev_priv = dev->dev_private;
59
60         if (start >= end ||
61             (start & (PAGE_SIZE - 1)) != 0 ||
62             (end & (PAGE_SIZE - 1)) != 0) {
63                 return -EINVAL;
64         }
65
66         drm_mm_init(&dev_priv->mm.gtt_space, start,
67                     end - start);
68
69         dev->gtt_total = (uint32_t) (end - start);
70
71         return 0;
72 }
73
74 int
75 i915_gem_init_ioctl(struct drm_device *dev, void *data,
76                     struct drm_file *file_priv)
77 {
78         struct drm_i915_gem_init *args = data;
79         int ret;
80
81         mutex_lock(&dev->struct_mutex);
82         ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
83         mutex_unlock(&dev->struct_mutex);
84
85         return ret;
86 }
87
88 int
89 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
90                             struct drm_file *file_priv)
91 {
92         struct drm_i915_gem_get_aperture *args = data;
93
94         if (!(dev->driver->driver_features & DRIVER_GEM))
95                 return -ENODEV;
96
97         args->aper_size = dev->gtt_total;
98         args->aper_available_size = (args->aper_size -
99                                      atomic_read(&dev->pin_memory));
100
101         return 0;
102 }
103
104
105 /**
106  * Creates a new mm object and returns a handle to it.
107  */
108 int
109 i915_gem_create_ioctl(struct drm_device *dev, void *data,
110                       struct drm_file *file_priv)
111 {
112         struct drm_i915_gem_create *args = data;
113         struct drm_gem_object *obj;
114         int handle, ret;
115
116         args->size = roundup(args->size, PAGE_SIZE);
117
118         /* Allocate the new object */
119         obj = drm_gem_object_alloc(dev, args->size);
120         if (obj == NULL)
121                 return -ENOMEM;
122
123         ret = drm_gem_handle_create(file_priv, obj, &handle);
124         mutex_lock(&dev->struct_mutex);
125         drm_gem_object_handle_unreference(obj);
126         mutex_unlock(&dev->struct_mutex);
127
128         if (ret)
129                 return ret;
130
131         args->handle = handle;
132
133         return 0;
134 }
135
136 static inline int
137 fast_shmem_read(struct page **pages,
138                 loff_t page_base, int page_offset,
139                 char __user *data,
140                 int length)
141 {
142         char __iomem *vaddr;
143         int unwritten;
144
145         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
146         if (vaddr == NULL)
147                 return -ENOMEM;
148         unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
149         kunmap_atomic(vaddr, KM_USER0);
150
151         if (unwritten)
152                 return -EFAULT;
153
154         return 0;
155 }
156
157 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
158 {
159         drm_i915_private_t *dev_priv = obj->dev->dev_private;
160         struct drm_i915_gem_object *obj_priv = obj->driver_private;
161
162         return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
163                 obj_priv->tiling_mode != I915_TILING_NONE;
164 }
165
166 static inline int
167 slow_shmem_copy(struct page *dst_page,
168                 int dst_offset,
169                 struct page *src_page,
170                 int src_offset,
171                 int length)
172 {
173         char *dst_vaddr, *src_vaddr;
174
175         dst_vaddr = kmap_atomic(dst_page, KM_USER0);
176         if (dst_vaddr == NULL)
177                 return -ENOMEM;
178
179         src_vaddr = kmap_atomic(src_page, KM_USER1);
180         if (src_vaddr == NULL) {
181                 kunmap_atomic(dst_vaddr, KM_USER0);
182                 return -ENOMEM;
183         }
184
185         memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
186
187         kunmap_atomic(src_vaddr, KM_USER1);
188         kunmap_atomic(dst_vaddr, KM_USER0);
189
190         return 0;
191 }
192
193 static inline int
194 slow_shmem_bit17_copy(struct page *gpu_page,
195                       int gpu_offset,
196                       struct page *cpu_page,
197                       int cpu_offset,
198                       int length,
199                       int is_read)
200 {
201         char *gpu_vaddr, *cpu_vaddr;
202
203         /* Use the unswizzled path if this page isn't affected. */
204         if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
205                 if (is_read)
206                         return slow_shmem_copy(cpu_page, cpu_offset,
207                                                gpu_page, gpu_offset, length);
208                 else
209                         return slow_shmem_copy(gpu_page, gpu_offset,
210                                                cpu_page, cpu_offset, length);
211         }
212
213         gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
214         if (gpu_vaddr == NULL)
215                 return -ENOMEM;
216
217         cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
218         if (cpu_vaddr == NULL) {
219                 kunmap_atomic(gpu_vaddr, KM_USER0);
220                 return -ENOMEM;
221         }
222
223         /* Copy the data, XORing A6 with A17 (1). The user already knows he's
224          * XORing with the other bits (A9 for Y, A9 and A10 for X)
225          */
226         while (length > 0) {
227                 int cacheline_end = ALIGN(gpu_offset + 1, 64);
228                 int this_length = min(cacheline_end - gpu_offset, length);
229                 int swizzled_gpu_offset = gpu_offset ^ 64;
230
231                 if (is_read) {
232                         memcpy(cpu_vaddr + cpu_offset,
233                                gpu_vaddr + swizzled_gpu_offset,
234                                this_length);
235                 } else {
236                         memcpy(gpu_vaddr + swizzled_gpu_offset,
237                                cpu_vaddr + cpu_offset,
238                                this_length);
239                 }
240                 cpu_offset += this_length;
241                 gpu_offset += this_length;
242                 length -= this_length;
243         }
244
245         kunmap_atomic(cpu_vaddr, KM_USER1);
246         kunmap_atomic(gpu_vaddr, KM_USER0);
247
248         return 0;
249 }
250
251 /**
252  * This is the fast shmem pread path, which attempts to copy_from_user directly
253  * from the backing pages of the object to the user's address space.  On a
254  * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
255  */
256 static int
257 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
258                           struct drm_i915_gem_pread *args,
259                           struct drm_file *file_priv)
260 {
261         struct drm_i915_gem_object *obj_priv = obj->driver_private;
262         ssize_t remain;
263         loff_t offset, page_base;
264         char __user *user_data;
265         int page_offset, page_length;
266         int ret;
267
268         user_data = (char __user *) (uintptr_t) args->data_ptr;
269         remain = args->size;
270
271         mutex_lock(&dev->struct_mutex);
272
273         ret = i915_gem_object_get_pages(obj);
274         if (ret != 0)
275                 goto fail_unlock;
276
277         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
278                                                         args->size);
279         if (ret != 0)
280                 goto fail_put_pages;
281
282         obj_priv = obj->driver_private;
283         offset = args->offset;
284
285         while (remain > 0) {
286                 /* Operation in this page
287                  *
288                  * page_base = page offset within aperture
289                  * page_offset = offset within page
290                  * page_length = bytes to copy for this page
291                  */
292                 page_base = (offset & ~(PAGE_SIZE-1));
293                 page_offset = offset & (PAGE_SIZE-1);
294                 page_length = remain;
295                 if ((page_offset + remain) > PAGE_SIZE)
296                         page_length = PAGE_SIZE - page_offset;
297
298                 ret = fast_shmem_read(obj_priv->pages,
299                                       page_base, page_offset,
300                                       user_data, page_length);
301                 if (ret)
302                         goto fail_put_pages;
303
304                 remain -= page_length;
305                 user_data += page_length;
306                 offset += page_length;
307         }
308
309 fail_put_pages:
310         i915_gem_object_put_pages(obj);
311 fail_unlock:
312         mutex_unlock(&dev->struct_mutex);
313
314         return ret;
315 }
316
317 /**
318  * This is the fallback shmem pread path, which allocates temporary storage
319  * in kernel space to copy_to_user into outside of the struct_mutex, so we
320  * can copy out of the object's backing pages while holding the struct mutex
321  * and not take page faults.
322  */
323 static int
324 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
325                           struct drm_i915_gem_pread *args,
326                           struct drm_file *file_priv)
327 {
328         struct drm_i915_gem_object *obj_priv = obj->driver_private;
329         struct mm_struct *mm = current->mm;
330         struct page **user_pages;
331         ssize_t remain;
332         loff_t offset, pinned_pages, i;
333         loff_t first_data_page, last_data_page, num_pages;
334         int shmem_page_index, shmem_page_offset;
335         int data_page_index,  data_page_offset;
336         int page_length;
337         int ret;
338         uint64_t data_ptr = args->data_ptr;
339         int do_bit17_swizzling;
340
341         remain = args->size;
342
343         /* Pin the user pages containing the data.  We can't fault while
344          * holding the struct mutex, yet we want to hold it while
345          * dereferencing the user data.
346          */
347         first_data_page = data_ptr / PAGE_SIZE;
348         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
349         num_pages = last_data_page - first_data_page + 1;
350
351         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
352         if (user_pages == NULL)
353                 return -ENOMEM;
354
355         down_read(&mm->mmap_sem);
356         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
357                                       num_pages, 1, 0, user_pages, NULL);
358         up_read(&mm->mmap_sem);
359         if (pinned_pages < num_pages) {
360                 ret = -EFAULT;
361                 goto fail_put_user_pages;
362         }
363
364         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
365
366         mutex_lock(&dev->struct_mutex);
367
368         ret = i915_gem_object_get_pages(obj);
369         if (ret != 0)
370                 goto fail_unlock;
371
372         ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
373                                                         args->size);
374         if (ret != 0)
375                 goto fail_put_pages;
376
377         obj_priv = obj->driver_private;
378         offset = args->offset;
379
380         while (remain > 0) {
381                 /* Operation in this page
382                  *
383                  * shmem_page_index = page number within shmem file
384                  * shmem_page_offset = offset within page in shmem file
385                  * data_page_index = page number in get_user_pages return
386                  * data_page_offset = offset with data_page_index page.
387                  * page_length = bytes to copy for this page
388                  */
389                 shmem_page_index = offset / PAGE_SIZE;
390                 shmem_page_offset = offset & ~PAGE_MASK;
391                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
392                 data_page_offset = data_ptr & ~PAGE_MASK;
393
394                 page_length = remain;
395                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
396                         page_length = PAGE_SIZE - shmem_page_offset;
397                 if ((data_page_offset + page_length) > PAGE_SIZE)
398                         page_length = PAGE_SIZE - data_page_offset;
399
400                 if (do_bit17_swizzling) {
401                         ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
402                                                     shmem_page_offset,
403                                                     user_pages[data_page_index],
404                                                     data_page_offset,
405                                                     page_length,
406                                                     1);
407                 } else {
408                         ret = slow_shmem_copy(user_pages[data_page_index],
409                                               data_page_offset,
410                                               obj_priv->pages[shmem_page_index],
411                                               shmem_page_offset,
412                                               page_length);
413                 }
414                 if (ret)
415                         goto fail_put_pages;
416
417                 remain -= page_length;
418                 data_ptr += page_length;
419                 offset += page_length;
420         }
421
422 fail_put_pages:
423         i915_gem_object_put_pages(obj);
424 fail_unlock:
425         mutex_unlock(&dev->struct_mutex);
426 fail_put_user_pages:
427         for (i = 0; i < pinned_pages; i++) {
428                 SetPageDirty(user_pages[i]);
429                 page_cache_release(user_pages[i]);
430         }
431         drm_free_large(user_pages);
432
433         return ret;
434 }
435
436 /**
437  * Reads data from the object referenced by handle.
438  *
439  * On error, the contents of *data are undefined.
440  */
441 int
442 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
443                      struct drm_file *file_priv)
444 {
445         struct drm_i915_gem_pread *args = data;
446         struct drm_gem_object *obj;
447         struct drm_i915_gem_object *obj_priv;
448         int ret;
449
450         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
451         if (obj == NULL)
452                 return -EBADF;
453         obj_priv = obj->driver_private;
454
455         /* Bounds check source.
456          *
457          * XXX: This could use review for overflow issues...
458          */
459         if (args->offset > obj->size || args->size > obj->size ||
460             args->offset + args->size > obj->size) {
461                 drm_gem_object_unreference(obj);
462                 return -EINVAL;
463         }
464
465         if (i915_gem_object_needs_bit17_swizzle(obj)) {
466                 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
467         } else {
468                 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
469                 if (ret != 0)
470                         ret = i915_gem_shmem_pread_slow(dev, obj, args,
471                                                         file_priv);
472         }
473
474         drm_gem_object_unreference(obj);
475
476         return ret;
477 }
478
479 /* This is the fast write path which cannot handle
480  * page faults in the source data
481  */
482
483 static inline int
484 fast_user_write(struct io_mapping *mapping,
485                 loff_t page_base, int page_offset,
486                 char __user *user_data,
487                 int length)
488 {
489         char *vaddr_atomic;
490         unsigned long unwritten;
491
492         vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
493         unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
494                                                       user_data, length);
495         io_mapping_unmap_atomic(vaddr_atomic);
496         if (unwritten)
497                 return -EFAULT;
498         return 0;
499 }
500
501 /* Here's the write path which can sleep for
502  * page faults
503  */
504
505 static inline int
506 slow_kernel_write(struct io_mapping *mapping,
507                   loff_t gtt_base, int gtt_offset,
508                   struct page *user_page, int user_offset,
509                   int length)
510 {
511         char *src_vaddr, *dst_vaddr;
512         unsigned long unwritten;
513
514         dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base);
515         src_vaddr = kmap_atomic(user_page, KM_USER1);
516         unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset,
517                                                       src_vaddr + user_offset,
518                                                       length);
519         kunmap_atomic(src_vaddr, KM_USER1);
520         io_mapping_unmap_atomic(dst_vaddr);
521         if (unwritten)
522                 return -EFAULT;
523         return 0;
524 }
525
526 static inline int
527 fast_shmem_write(struct page **pages,
528                  loff_t page_base, int page_offset,
529                  char __user *data,
530                  int length)
531 {
532         char __iomem *vaddr;
533         unsigned long unwritten;
534
535         vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
536         if (vaddr == NULL)
537                 return -ENOMEM;
538         unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
539         kunmap_atomic(vaddr, KM_USER0);
540
541         if (unwritten)
542                 return -EFAULT;
543         return 0;
544 }
545
546 /**
547  * This is the fast pwrite path, where we copy the data directly from the
548  * user into the GTT, uncached.
549  */
550 static int
551 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
552                          struct drm_i915_gem_pwrite *args,
553                          struct drm_file *file_priv)
554 {
555         struct drm_i915_gem_object *obj_priv = obj->driver_private;
556         drm_i915_private_t *dev_priv = dev->dev_private;
557         ssize_t remain;
558         loff_t offset, page_base;
559         char __user *user_data;
560         int page_offset, page_length;
561         int ret;
562
563         user_data = (char __user *) (uintptr_t) args->data_ptr;
564         remain = args->size;
565         if (!access_ok(VERIFY_READ, user_data, remain))
566                 return -EFAULT;
567
568
569         mutex_lock(&dev->struct_mutex);
570         ret = i915_gem_object_pin(obj, 0);
571         if (ret) {
572                 mutex_unlock(&dev->struct_mutex);
573                 return ret;
574         }
575         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
576         if (ret)
577                 goto fail;
578
579         obj_priv = obj->driver_private;
580         offset = obj_priv->gtt_offset + args->offset;
581
582         while (remain > 0) {
583                 /* Operation in this page
584                  *
585                  * page_base = page offset within aperture
586                  * page_offset = offset within page
587                  * page_length = bytes to copy for this page
588                  */
589                 page_base = (offset & ~(PAGE_SIZE-1));
590                 page_offset = offset & (PAGE_SIZE-1);
591                 page_length = remain;
592                 if ((page_offset + remain) > PAGE_SIZE)
593                         page_length = PAGE_SIZE - page_offset;
594
595                 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
596                                        page_offset, user_data, page_length);
597
598                 /* If we get a fault while copying data, then (presumably) our
599                  * source page isn't available.  Return the error and we'll
600                  * retry in the slow path.
601                  */
602                 if (ret)
603                         goto fail;
604
605                 remain -= page_length;
606                 user_data += page_length;
607                 offset += page_length;
608         }
609
610 fail:
611         i915_gem_object_unpin(obj);
612         mutex_unlock(&dev->struct_mutex);
613
614         return ret;
615 }
616
617 /**
618  * This is the fallback GTT pwrite path, which uses get_user_pages to pin
619  * the memory and maps it using kmap_atomic for copying.
620  *
621  * This code resulted in x11perf -rgb10text consuming about 10% more CPU
622  * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
623  */
624 static int
625 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
626                          struct drm_i915_gem_pwrite *args,
627                          struct drm_file *file_priv)
628 {
629         struct drm_i915_gem_object *obj_priv = obj->driver_private;
630         drm_i915_private_t *dev_priv = dev->dev_private;
631         ssize_t remain;
632         loff_t gtt_page_base, offset;
633         loff_t first_data_page, last_data_page, num_pages;
634         loff_t pinned_pages, i;
635         struct page **user_pages;
636         struct mm_struct *mm = current->mm;
637         int gtt_page_offset, data_page_offset, data_page_index, page_length;
638         int ret;
639         uint64_t data_ptr = args->data_ptr;
640
641         remain = args->size;
642
643         /* Pin the user pages containing the data.  We can't fault while
644          * holding the struct mutex, and all of the pwrite implementations
645          * want to hold it while dereferencing the user data.
646          */
647         first_data_page = data_ptr / PAGE_SIZE;
648         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
649         num_pages = last_data_page - first_data_page + 1;
650
651         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
652         if (user_pages == NULL)
653                 return -ENOMEM;
654
655         down_read(&mm->mmap_sem);
656         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
657                                       num_pages, 0, 0, user_pages, NULL);
658         up_read(&mm->mmap_sem);
659         if (pinned_pages < num_pages) {
660                 ret = -EFAULT;
661                 goto out_unpin_pages;
662         }
663
664         mutex_lock(&dev->struct_mutex);
665         ret = i915_gem_object_pin(obj, 0);
666         if (ret)
667                 goto out_unlock;
668
669         ret = i915_gem_object_set_to_gtt_domain(obj, 1);
670         if (ret)
671                 goto out_unpin_object;
672
673         obj_priv = obj->driver_private;
674         offset = obj_priv->gtt_offset + args->offset;
675
676         while (remain > 0) {
677                 /* Operation in this page
678                  *
679                  * gtt_page_base = page offset within aperture
680                  * gtt_page_offset = offset within page in aperture
681                  * data_page_index = page number in get_user_pages return
682                  * data_page_offset = offset with data_page_index page.
683                  * page_length = bytes to copy for this page
684                  */
685                 gtt_page_base = offset & PAGE_MASK;
686                 gtt_page_offset = offset & ~PAGE_MASK;
687                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
688                 data_page_offset = data_ptr & ~PAGE_MASK;
689
690                 page_length = remain;
691                 if ((gtt_page_offset + page_length) > PAGE_SIZE)
692                         page_length = PAGE_SIZE - gtt_page_offset;
693                 if ((data_page_offset + page_length) > PAGE_SIZE)
694                         page_length = PAGE_SIZE - data_page_offset;
695
696                 ret = slow_kernel_write(dev_priv->mm.gtt_mapping,
697                                         gtt_page_base, gtt_page_offset,
698                                         user_pages[data_page_index],
699                                         data_page_offset,
700                                         page_length);
701
702                 /* If we get a fault while copying data, then (presumably) our
703                  * source page isn't available.  Return the error and we'll
704                  * retry in the slow path.
705                  */
706                 if (ret)
707                         goto out_unpin_object;
708
709                 remain -= page_length;
710                 offset += page_length;
711                 data_ptr += page_length;
712         }
713
714 out_unpin_object:
715         i915_gem_object_unpin(obj);
716 out_unlock:
717         mutex_unlock(&dev->struct_mutex);
718 out_unpin_pages:
719         for (i = 0; i < pinned_pages; i++)
720                 page_cache_release(user_pages[i]);
721         drm_free_large(user_pages);
722
723         return ret;
724 }
725
726 /**
727  * This is the fast shmem pwrite path, which attempts to directly
728  * copy_from_user into the kmapped pages backing the object.
729  */
730 static int
731 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
732                            struct drm_i915_gem_pwrite *args,
733                            struct drm_file *file_priv)
734 {
735         struct drm_i915_gem_object *obj_priv = obj->driver_private;
736         ssize_t remain;
737         loff_t offset, page_base;
738         char __user *user_data;
739         int page_offset, page_length;
740         int ret;
741
742         user_data = (char __user *) (uintptr_t) args->data_ptr;
743         remain = args->size;
744
745         mutex_lock(&dev->struct_mutex);
746
747         ret = i915_gem_object_get_pages(obj);
748         if (ret != 0)
749                 goto fail_unlock;
750
751         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
752         if (ret != 0)
753                 goto fail_put_pages;
754
755         obj_priv = obj->driver_private;
756         offset = args->offset;
757         obj_priv->dirty = 1;
758
759         while (remain > 0) {
760                 /* Operation in this page
761                  *
762                  * page_base = page offset within aperture
763                  * page_offset = offset within page
764                  * page_length = bytes to copy for this page
765                  */
766                 page_base = (offset & ~(PAGE_SIZE-1));
767                 page_offset = offset & (PAGE_SIZE-1);
768                 page_length = remain;
769                 if ((page_offset + remain) > PAGE_SIZE)
770                         page_length = PAGE_SIZE - page_offset;
771
772                 ret = fast_shmem_write(obj_priv->pages,
773                                        page_base, page_offset,
774                                        user_data, page_length);
775                 if (ret)
776                         goto fail_put_pages;
777
778                 remain -= page_length;
779                 user_data += page_length;
780                 offset += page_length;
781         }
782
783 fail_put_pages:
784         i915_gem_object_put_pages(obj);
785 fail_unlock:
786         mutex_unlock(&dev->struct_mutex);
787
788         return ret;
789 }
790
791 /**
792  * This is the fallback shmem pwrite path, which uses get_user_pages to pin
793  * the memory and maps it using kmap_atomic for copying.
794  *
795  * This avoids taking mmap_sem for faulting on the user's address while the
796  * struct_mutex is held.
797  */
798 static int
799 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
800                            struct drm_i915_gem_pwrite *args,
801                            struct drm_file *file_priv)
802 {
803         struct drm_i915_gem_object *obj_priv = obj->driver_private;
804         struct mm_struct *mm = current->mm;
805         struct page **user_pages;
806         ssize_t remain;
807         loff_t offset, pinned_pages, i;
808         loff_t first_data_page, last_data_page, num_pages;
809         int shmem_page_index, shmem_page_offset;
810         int data_page_index,  data_page_offset;
811         int page_length;
812         int ret;
813         uint64_t data_ptr = args->data_ptr;
814         int do_bit17_swizzling;
815
816         remain = args->size;
817
818         /* Pin the user pages containing the data.  We can't fault while
819          * holding the struct mutex, and all of the pwrite implementations
820          * want to hold it while dereferencing the user data.
821          */
822         first_data_page = data_ptr / PAGE_SIZE;
823         last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
824         num_pages = last_data_page - first_data_page + 1;
825
826         user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
827         if (user_pages == NULL)
828                 return -ENOMEM;
829
830         down_read(&mm->mmap_sem);
831         pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
832                                       num_pages, 0, 0, user_pages, NULL);
833         up_read(&mm->mmap_sem);
834         if (pinned_pages < num_pages) {
835                 ret = -EFAULT;
836                 goto fail_put_user_pages;
837         }
838
839         do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
840
841         mutex_lock(&dev->struct_mutex);
842
843         ret = i915_gem_object_get_pages(obj);
844         if (ret != 0)
845                 goto fail_unlock;
846
847         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
848         if (ret != 0)
849                 goto fail_put_pages;
850
851         obj_priv = obj->driver_private;
852         offset = args->offset;
853         obj_priv->dirty = 1;
854
855         while (remain > 0) {
856                 /* Operation in this page
857                  *
858                  * shmem_page_index = page number within shmem file
859                  * shmem_page_offset = offset within page in shmem file
860                  * data_page_index = page number in get_user_pages return
861                  * data_page_offset = offset with data_page_index page.
862                  * page_length = bytes to copy for this page
863                  */
864                 shmem_page_index = offset / PAGE_SIZE;
865                 shmem_page_offset = offset & ~PAGE_MASK;
866                 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
867                 data_page_offset = data_ptr & ~PAGE_MASK;
868
869                 page_length = remain;
870                 if ((shmem_page_offset + page_length) > PAGE_SIZE)
871                         page_length = PAGE_SIZE - shmem_page_offset;
872                 if ((data_page_offset + page_length) > PAGE_SIZE)
873                         page_length = PAGE_SIZE - data_page_offset;
874
875                 if (do_bit17_swizzling) {
876                         ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
877                                                     shmem_page_offset,
878                                                     user_pages[data_page_index],
879                                                     data_page_offset,
880                                                     page_length,
881                                                     0);
882                 } else {
883                         ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
884                                               shmem_page_offset,
885                                               user_pages[data_page_index],
886                                               data_page_offset,
887                                               page_length);
888                 }
889                 if (ret)
890                         goto fail_put_pages;
891
892                 remain -= page_length;
893                 data_ptr += page_length;
894                 offset += page_length;
895         }
896
897 fail_put_pages:
898         i915_gem_object_put_pages(obj);
899 fail_unlock:
900         mutex_unlock(&dev->struct_mutex);
901 fail_put_user_pages:
902         for (i = 0; i < pinned_pages; i++)
903                 page_cache_release(user_pages[i]);
904         drm_free_large(user_pages);
905
906         return ret;
907 }
908
909 /**
910  * Writes data to the object referenced by handle.
911  *
912  * On error, the contents of the buffer that were to be modified are undefined.
913  */
914 int
915 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
916                       struct drm_file *file_priv)
917 {
918         struct drm_i915_gem_pwrite *args = data;
919         struct drm_gem_object *obj;
920         struct drm_i915_gem_object *obj_priv;
921         int ret = 0;
922
923         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
924         if (obj == NULL)
925                 return -EBADF;
926         obj_priv = obj->driver_private;
927
928         /* Bounds check destination.
929          *
930          * XXX: This could use review for overflow issues...
931          */
932         if (args->offset > obj->size || args->size > obj->size ||
933             args->offset + args->size > obj->size) {
934                 drm_gem_object_unreference(obj);
935                 return -EINVAL;
936         }
937
938         /* We can only do the GTT pwrite on untiled buffers, as otherwise
939          * it would end up going through the fenced access, and we'll get
940          * different detiling behavior between reading and writing.
941          * pread/pwrite currently are reading and writing from the CPU
942          * perspective, requiring manual detiling by the client.
943          */
944         if (obj_priv->phys_obj)
945                 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
946         else if (obj_priv->tiling_mode == I915_TILING_NONE &&
947                  dev->gtt_total != 0) {
948                 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
949                 if (ret == -EFAULT) {
950                         ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
951                                                        file_priv);
952                 }
953         } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
954                 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
955         } else {
956                 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
957                 if (ret == -EFAULT) {
958                         ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
959                                                          file_priv);
960                 }
961         }
962
963 #if WATCH_PWRITE
964         if (ret)
965                 DRM_INFO("pwrite failed %d\n", ret);
966 #endif
967
968         drm_gem_object_unreference(obj);
969
970         return ret;
971 }
972
973 /**
974  * Called when user space prepares to use an object with the CPU, either
975  * through the mmap ioctl's mapping or a GTT mapping.
976  */
977 int
978 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
979                           struct drm_file *file_priv)
980 {
981         struct drm_i915_gem_set_domain *args = data;
982         struct drm_gem_object *obj;
983         uint32_t read_domains = args->read_domains;
984         uint32_t write_domain = args->write_domain;
985         int ret;
986
987         if (!(dev->driver->driver_features & DRIVER_GEM))
988                 return -ENODEV;
989
990         /* Only handle setting domains to types used by the CPU. */
991         if (write_domain & I915_GEM_GPU_DOMAINS)
992                 return -EINVAL;
993
994         if (read_domains & I915_GEM_GPU_DOMAINS)
995                 return -EINVAL;
996
997         /* Having something in the write domain implies it's in the read
998          * domain, and only that read domain.  Enforce that in the request.
999          */
1000         if (write_domain != 0 && read_domains != write_domain)
1001                 return -EINVAL;
1002
1003         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1004         if (obj == NULL)
1005                 return -EBADF;
1006
1007         mutex_lock(&dev->struct_mutex);
1008 #if WATCH_BUF
1009         DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1010                  obj, obj->size, read_domains, write_domain);
1011 #endif
1012         if (read_domains & I915_GEM_DOMAIN_GTT) {
1013                 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1014
1015                 /* Silently promote "you're not bound, there was nothing to do"
1016                  * to success, since the client was just asking us to
1017                  * make sure everything was done.
1018                  */
1019                 if (ret == -EINVAL)
1020                         ret = 0;
1021         } else {
1022                 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1023         }
1024
1025         drm_gem_object_unreference(obj);
1026         mutex_unlock(&dev->struct_mutex);
1027         return ret;
1028 }
1029
1030 /**
1031  * Called when user space has done writes to this buffer
1032  */
1033 int
1034 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1035                       struct drm_file *file_priv)
1036 {
1037         struct drm_i915_gem_sw_finish *args = data;
1038         struct drm_gem_object *obj;
1039         struct drm_i915_gem_object *obj_priv;
1040         int ret = 0;
1041
1042         if (!(dev->driver->driver_features & DRIVER_GEM))
1043                 return -ENODEV;
1044
1045         mutex_lock(&dev->struct_mutex);
1046         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1047         if (obj == NULL) {
1048                 mutex_unlock(&dev->struct_mutex);
1049                 return -EBADF;
1050         }
1051
1052 #if WATCH_BUF
1053         DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1054                  __func__, args->handle, obj, obj->size);
1055 #endif
1056         obj_priv = obj->driver_private;
1057
1058         /* Pinned buffers may be scanout, so flush the cache */
1059         if (obj_priv->pin_count)
1060                 i915_gem_object_flush_cpu_write_domain(obj);
1061
1062         drm_gem_object_unreference(obj);
1063         mutex_unlock(&dev->struct_mutex);
1064         return ret;
1065 }
1066
1067 /**
1068  * Maps the contents of an object, returning the address it is mapped
1069  * into.
1070  *
1071  * While the mapping holds a reference on the contents of the object, it doesn't
1072  * imply a ref on the object itself.
1073  */
1074 int
1075 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1076                    struct drm_file *file_priv)
1077 {
1078         struct drm_i915_gem_mmap *args = data;
1079         struct drm_gem_object *obj;
1080         loff_t offset;
1081         unsigned long addr;
1082
1083         if (!(dev->driver->driver_features & DRIVER_GEM))
1084                 return -ENODEV;
1085
1086         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1087         if (obj == NULL)
1088                 return -EBADF;
1089
1090         offset = args->offset;
1091
1092         down_write(&current->mm->mmap_sem);
1093         addr = do_mmap(obj->filp, 0, args->size,
1094                        PROT_READ | PROT_WRITE, MAP_SHARED,
1095                        args->offset);
1096         up_write(&current->mm->mmap_sem);
1097         mutex_lock(&dev->struct_mutex);
1098         drm_gem_object_unreference(obj);
1099         mutex_unlock(&dev->struct_mutex);
1100         if (IS_ERR((void *)addr))
1101                 return addr;
1102
1103         args->addr_ptr = (uint64_t) addr;
1104
1105         return 0;
1106 }
1107
1108 /**
1109  * i915_gem_fault - fault a page into the GTT
1110  * vma: VMA in question
1111  * vmf: fault info
1112  *
1113  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1114  * from userspace.  The fault handler takes care of binding the object to
1115  * the GTT (if needed), allocating and programming a fence register (again,
1116  * only if needed based on whether the old reg is still valid or the object
1117  * is tiled) and inserting a new PTE into the faulting process.
1118  *
1119  * Note that the faulting process may involve evicting existing objects
1120  * from the GTT and/or fence registers to make room.  So performance may
1121  * suffer if the GTT working set is large or there are few fence registers
1122  * left.
1123  */
1124 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1125 {
1126         struct drm_gem_object *obj = vma->vm_private_data;
1127         struct drm_device *dev = obj->dev;
1128         struct drm_i915_private *dev_priv = dev->dev_private;
1129         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1130         pgoff_t page_offset;
1131         unsigned long pfn;
1132         int ret = 0;
1133         bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1134
1135         /* We don't use vmf->pgoff since that has the fake offset */
1136         page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1137                 PAGE_SHIFT;
1138
1139         /* Now bind it into the GTT if needed */
1140         mutex_lock(&dev->struct_mutex);
1141         if (!obj_priv->gtt_space) {
1142                 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
1143                 if (ret) {
1144                         mutex_unlock(&dev->struct_mutex);
1145                         return VM_FAULT_SIGBUS;
1146                 }
1147
1148                 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1149                 if (ret) {
1150                         mutex_unlock(&dev->struct_mutex);
1151                         return VM_FAULT_SIGBUS;
1152                 }
1153
1154                 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1155         }
1156
1157         /* Need a new fence register? */
1158         if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
1159             obj_priv->tiling_mode != I915_TILING_NONE) {
1160                 ret = i915_gem_object_get_fence_reg(obj);
1161                 if (ret) {
1162                         mutex_unlock(&dev->struct_mutex);
1163                         return VM_FAULT_SIGBUS;
1164                 }
1165         }
1166
1167         pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1168                 page_offset;
1169
1170         /* Finally, remap it using the new GTT offset */
1171         ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1172
1173         mutex_unlock(&dev->struct_mutex);
1174
1175         switch (ret) {
1176         case -ENOMEM:
1177         case -EAGAIN:
1178                 return VM_FAULT_OOM;
1179         case -EFAULT:
1180         case -EINVAL:
1181                 return VM_FAULT_SIGBUS;
1182         default:
1183                 return VM_FAULT_NOPAGE;
1184         }
1185 }
1186
1187 /**
1188  * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1189  * @obj: obj in question
1190  *
1191  * GEM memory mapping works by handing back to userspace a fake mmap offset
1192  * it can use in a subsequent mmap(2) call.  The DRM core code then looks
1193  * up the object based on the offset and sets up the various memory mapping
1194  * structures.
1195  *
1196  * This routine allocates and attaches a fake offset for @obj.
1197  */
1198 static int
1199 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1200 {
1201         struct drm_device *dev = obj->dev;
1202         struct drm_gem_mm *mm = dev->mm_private;
1203         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1204         struct drm_map_list *list;
1205         struct drm_local_map *map;
1206         int ret = 0;
1207
1208         /* Set the object up for mmap'ing */
1209         list = &obj->map_list;
1210         list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1211         if (!list->map)
1212                 return -ENOMEM;
1213
1214         map = list->map;
1215         map->type = _DRM_GEM;
1216         map->size = obj->size;
1217         map->handle = obj;
1218
1219         /* Get a DRM GEM mmap offset allocated... */
1220         list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1221                                                     obj->size / PAGE_SIZE, 0, 0);
1222         if (!list->file_offset_node) {
1223                 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1224                 ret = -ENOMEM;
1225                 goto out_free_list;
1226         }
1227
1228         list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1229                                                   obj->size / PAGE_SIZE, 0);
1230         if (!list->file_offset_node) {
1231                 ret = -ENOMEM;
1232                 goto out_free_list;
1233         }
1234
1235         list->hash.key = list->file_offset_node->start;
1236         if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1237                 DRM_ERROR("failed to add to map hash\n");
1238                 goto out_free_mm;
1239         }
1240
1241         /* By now we should be all set, any drm_mmap request on the offset
1242          * below will get to our mmap & fault handler */
1243         obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1244
1245         return 0;
1246
1247 out_free_mm:
1248         drm_mm_put_block(list->file_offset_node);
1249 out_free_list:
1250         kfree(list->map);
1251
1252         return ret;
1253 }
1254
1255 /**
1256  * i915_gem_release_mmap - remove physical page mappings
1257  * @obj: obj in question
1258  *
1259  * Preserve the reservation of the mmaping with the DRM core code, but
1260  * relinquish ownership of the pages back to the system.
1261  *
1262  * It is vital that we remove the page mapping if we have mapped a tiled
1263  * object through the GTT and then lose the fence register due to
1264  * resource pressure. Similarly if the object has been moved out of the
1265  * aperture, than pages mapped into userspace must be revoked. Removing the
1266  * mapping will then trigger a page fault on the next user access, allowing
1267  * fixup by i915_gem_fault().
1268  */
1269 void
1270 i915_gem_release_mmap(struct drm_gem_object *obj)
1271 {
1272         struct drm_device *dev = obj->dev;
1273         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1274
1275         if (dev->dev_mapping)
1276                 unmap_mapping_range(dev->dev_mapping,
1277                                     obj_priv->mmap_offset, obj->size, 1);
1278 }
1279
1280 static void
1281 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1282 {
1283         struct drm_device *dev = obj->dev;
1284         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1285         struct drm_gem_mm *mm = dev->mm_private;
1286         struct drm_map_list *list;
1287
1288         list = &obj->map_list;
1289         drm_ht_remove_item(&mm->offset_hash, &list->hash);
1290
1291         if (list->file_offset_node) {
1292                 drm_mm_put_block(list->file_offset_node);
1293                 list->file_offset_node = NULL;
1294         }
1295
1296         if (list->map) {
1297                 kfree(list->map);
1298                 list->map = NULL;
1299         }
1300
1301         obj_priv->mmap_offset = 0;
1302 }
1303
1304 /**
1305  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1306  * @obj: object to check
1307  *
1308  * Return the required GTT alignment for an object, taking into account
1309  * potential fence register mapping if needed.
1310  */
1311 static uint32_t
1312 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1313 {
1314         struct drm_device *dev = obj->dev;
1315         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1316         int start, i;
1317
1318         /*
1319          * Minimum alignment is 4k (GTT page size), but might be greater
1320          * if a fence register is needed for the object.
1321          */
1322         if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1323                 return 4096;
1324
1325         /*
1326          * Previous chips need to be aligned to the size of the smallest
1327          * fence register that can contain the object.
1328          */
1329         if (IS_I9XX(dev))
1330                 start = 1024*1024;
1331         else
1332                 start = 512*1024;
1333
1334         for (i = start; i < obj->size; i <<= 1)
1335                 ;
1336
1337         return i;
1338 }
1339
1340 /**
1341  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1342  * @dev: DRM device
1343  * @data: GTT mapping ioctl data
1344  * @file_priv: GEM object info
1345  *
1346  * Simply returns the fake offset to userspace so it can mmap it.
1347  * The mmap call will end up in drm_gem_mmap(), which will set things
1348  * up so we can get faults in the handler above.
1349  *
1350  * The fault handler will take care of binding the object into the GTT
1351  * (since it may have been evicted to make room for something), allocating
1352  * a fence register, and mapping the appropriate aperture address into
1353  * userspace.
1354  */
1355 int
1356 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1357                         struct drm_file *file_priv)
1358 {
1359         struct drm_i915_gem_mmap_gtt *args = data;
1360         struct drm_i915_private *dev_priv = dev->dev_private;
1361         struct drm_gem_object *obj;
1362         struct drm_i915_gem_object *obj_priv;
1363         int ret;
1364
1365         if (!(dev->driver->driver_features & DRIVER_GEM))
1366                 return -ENODEV;
1367
1368         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1369         if (obj == NULL)
1370                 return -EBADF;
1371
1372         mutex_lock(&dev->struct_mutex);
1373
1374         obj_priv = obj->driver_private;
1375
1376         if (!obj_priv->mmap_offset) {
1377                 ret = i915_gem_create_mmap_offset(obj);
1378                 if (ret) {
1379                         drm_gem_object_unreference(obj);
1380                         mutex_unlock(&dev->struct_mutex);
1381                         return ret;
1382                 }
1383         }
1384
1385         args->offset = obj_priv->mmap_offset;
1386
1387         obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
1388
1389         /* Make sure the alignment is correct for fence regs etc */
1390         if (obj_priv->agp_mem &&
1391             (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
1392                 drm_gem_object_unreference(obj);
1393                 mutex_unlock(&dev->struct_mutex);
1394                 return -EINVAL;
1395         }
1396
1397         /*
1398          * Pull it into the GTT so that we have a page list (makes the
1399          * initial fault faster and any subsequent flushing possible).
1400          */
1401         if (!obj_priv->agp_mem) {
1402                 ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
1403                 if (ret) {
1404                         drm_gem_object_unreference(obj);
1405                         mutex_unlock(&dev->struct_mutex);
1406                         return ret;
1407                 }
1408                 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1409         }
1410
1411         drm_gem_object_unreference(obj);
1412         mutex_unlock(&dev->struct_mutex);
1413
1414         return 0;
1415 }
1416
1417 void
1418 i915_gem_object_put_pages(struct drm_gem_object *obj)
1419 {
1420         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1421         int page_count = obj->size / PAGE_SIZE;
1422         int i;
1423
1424         BUG_ON(obj_priv->pages_refcount == 0);
1425
1426         if (--obj_priv->pages_refcount != 0)
1427                 return;
1428
1429         if (obj_priv->tiling_mode != I915_TILING_NONE)
1430                 i915_gem_object_save_bit_17_swizzle(obj);
1431
1432         for (i = 0; i < page_count; i++)
1433                 if (obj_priv->pages[i] != NULL) {
1434                         if (obj_priv->dirty)
1435                                 set_page_dirty(obj_priv->pages[i]);
1436                         mark_page_accessed(obj_priv->pages[i]);
1437                         page_cache_release(obj_priv->pages[i]);
1438                 }
1439         obj_priv->dirty = 0;
1440
1441         drm_free_large(obj_priv->pages);
1442         obj_priv->pages = NULL;
1443 }
1444
1445 static void
1446 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
1447 {
1448         struct drm_device *dev = obj->dev;
1449         drm_i915_private_t *dev_priv = dev->dev_private;
1450         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1451
1452         /* Add a reference if we're newly entering the active list. */
1453         if (!obj_priv->active) {
1454                 drm_gem_object_reference(obj);
1455                 obj_priv->active = 1;
1456         }
1457         /* Move from whatever list we were on to the tail of execution. */
1458         spin_lock(&dev_priv->mm.active_list_lock);
1459         list_move_tail(&obj_priv->list,
1460                        &dev_priv->mm.active_list);
1461         spin_unlock(&dev_priv->mm.active_list_lock);
1462         obj_priv->last_rendering_seqno = seqno;
1463 }
1464
1465 static void
1466 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1467 {
1468         struct drm_device *dev = obj->dev;
1469         drm_i915_private_t *dev_priv = dev->dev_private;
1470         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1471
1472         BUG_ON(!obj_priv->active);
1473         list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1474         obj_priv->last_rendering_seqno = 0;
1475 }
1476
1477 static void
1478 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1479 {
1480         struct drm_device *dev = obj->dev;
1481         drm_i915_private_t *dev_priv = dev->dev_private;
1482         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1483
1484         i915_verify_inactive(dev, __FILE__, __LINE__);
1485         if (obj_priv->pin_count != 0)
1486                 list_del_init(&obj_priv->list);
1487         else
1488                 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1489
1490         obj_priv->last_rendering_seqno = 0;
1491         if (obj_priv->active) {
1492                 obj_priv->active = 0;
1493                 drm_gem_object_unreference(obj);
1494         }
1495         i915_verify_inactive(dev, __FILE__, __LINE__);
1496 }
1497
1498 /**
1499  * Creates a new sequence number, emitting a write of it to the status page
1500  * plus an interrupt, which will trigger i915_user_interrupt_handler.
1501  *
1502  * Must be called with struct_lock held.
1503  *
1504  * Returned sequence numbers are nonzero on success.
1505  */
1506 static uint32_t
1507 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1508                  uint32_t flush_domains)
1509 {
1510         drm_i915_private_t *dev_priv = dev->dev_private;
1511         struct drm_i915_file_private *i915_file_priv = NULL;
1512         struct drm_i915_gem_request *request;
1513         uint32_t seqno;
1514         int was_empty;
1515         RING_LOCALS;
1516
1517         if (file_priv != NULL)
1518                 i915_file_priv = file_priv->driver_priv;
1519
1520         request = kzalloc(sizeof(*request), GFP_KERNEL);
1521         if (request == NULL)
1522                 return 0;
1523
1524         /* Grab the seqno we're going to make this request be, and bump the
1525          * next (skipping 0 so it can be the reserved no-seqno value).
1526          */
1527         seqno = dev_priv->mm.next_gem_seqno;
1528         dev_priv->mm.next_gem_seqno++;
1529         if (dev_priv->mm.next_gem_seqno == 0)
1530                 dev_priv->mm.next_gem_seqno++;
1531
1532         BEGIN_LP_RING(4);
1533         OUT_RING(MI_STORE_DWORD_INDEX);
1534         OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1535         OUT_RING(seqno);
1536
1537         OUT_RING(MI_USER_INTERRUPT);
1538         ADVANCE_LP_RING();
1539
1540         DRM_DEBUG("%d\n", seqno);
1541
1542         request->seqno = seqno;
1543         request->emitted_jiffies = jiffies;
1544         was_empty = list_empty(&dev_priv->mm.request_list);
1545         list_add_tail(&request->list, &dev_priv->mm.request_list);
1546         if (i915_file_priv) {
1547                 list_add_tail(&request->client_list,
1548                               &i915_file_priv->mm.request_list);
1549         } else {
1550                 INIT_LIST_HEAD(&request->client_list);
1551         }
1552
1553         /* Associate any objects on the flushing list matching the write
1554          * domain we're flushing with our flush.
1555          */
1556         if (flush_domains != 0) {
1557                 struct drm_i915_gem_object *obj_priv, *next;
1558
1559                 list_for_each_entry_safe(obj_priv, next,
1560                                          &dev_priv->mm.flushing_list, list) {
1561                         struct drm_gem_object *obj = obj_priv->obj;
1562
1563                         if ((obj->write_domain & flush_domains) ==
1564                             obj->write_domain) {
1565                                 obj->write_domain = 0;
1566                                 i915_gem_object_move_to_active(obj, seqno);
1567                         }
1568                 }
1569
1570         }
1571
1572         if (was_empty && !dev_priv->mm.suspended)
1573                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1574         return seqno;
1575 }
1576
1577 /**
1578  * Command execution barrier
1579  *
1580  * Ensures that all commands in the ring are finished
1581  * before signalling the CPU
1582  */
1583 static uint32_t
1584 i915_retire_commands(struct drm_device *dev)
1585 {
1586         drm_i915_private_t *dev_priv = dev->dev_private;
1587         uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1588         uint32_t flush_domains = 0;
1589         RING_LOCALS;
1590
1591         /* The sampler always gets flushed on i965 (sigh) */
1592         if (IS_I965G(dev))
1593                 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1594         BEGIN_LP_RING(2);
1595         OUT_RING(cmd);
1596         OUT_RING(0); /* noop */
1597         ADVANCE_LP_RING();
1598         return flush_domains;
1599 }
1600
1601 /**
1602  * Moves buffers associated only with the given active seqno from the active
1603  * to inactive list, potentially freeing them.
1604  */
1605 static void
1606 i915_gem_retire_request(struct drm_device *dev,
1607                         struct drm_i915_gem_request *request)
1608 {
1609         drm_i915_private_t *dev_priv = dev->dev_private;
1610
1611         /* Move any buffers on the active list that are no longer referenced
1612          * by the ringbuffer to the flushing/inactive lists as appropriate.
1613          */
1614         spin_lock(&dev_priv->mm.active_list_lock);
1615         while (!list_empty(&dev_priv->mm.active_list)) {
1616                 struct drm_gem_object *obj;
1617                 struct drm_i915_gem_object *obj_priv;
1618
1619                 obj_priv = list_first_entry(&dev_priv->mm.active_list,
1620                                             struct drm_i915_gem_object,
1621                                             list);
1622                 obj = obj_priv->obj;
1623
1624                 /* If the seqno being retired doesn't match the oldest in the
1625                  * list, then the oldest in the list must still be newer than
1626                  * this seqno.
1627                  */
1628                 if (obj_priv->last_rendering_seqno != request->seqno)
1629                         goto out;
1630
1631 #if WATCH_LRU
1632                 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1633                          __func__, request->seqno, obj);
1634 #endif
1635
1636                 if (obj->write_domain != 0)
1637                         i915_gem_object_move_to_flushing(obj);
1638                 else {
1639                         /* Take a reference on the object so it won't be
1640                          * freed while the spinlock is held.  The list
1641                          * protection for this spinlock is safe when breaking
1642                          * the lock like this since the next thing we do
1643                          * is just get the head of the list again.
1644                          */
1645                         drm_gem_object_reference(obj);
1646                         i915_gem_object_move_to_inactive(obj);
1647                         spin_unlock(&dev_priv->mm.active_list_lock);
1648                         drm_gem_object_unreference(obj);
1649                         spin_lock(&dev_priv->mm.active_list_lock);
1650                 }
1651         }
1652 out:
1653         spin_unlock(&dev_priv->mm.active_list_lock);
1654 }
1655
1656 /**
1657  * Returns true if seq1 is later than seq2.
1658  */
1659 static int
1660 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1661 {
1662         return (int32_t)(seq1 - seq2) >= 0;
1663 }
1664
1665 uint32_t
1666 i915_get_gem_seqno(struct drm_device *dev)
1667 {
1668         drm_i915_private_t *dev_priv = dev->dev_private;
1669
1670         return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
1671 }
1672
1673 /**
1674  * This function clears the request list as sequence numbers are passed.
1675  */
1676 void
1677 i915_gem_retire_requests(struct drm_device *dev)
1678 {
1679         drm_i915_private_t *dev_priv = dev->dev_private;
1680         uint32_t seqno;
1681
1682         if (!dev_priv->hw_status_page)
1683                 return;
1684
1685         seqno = i915_get_gem_seqno(dev);
1686
1687         while (!list_empty(&dev_priv->mm.request_list)) {
1688                 struct drm_i915_gem_request *request;
1689                 uint32_t retiring_seqno;
1690
1691                 request = list_first_entry(&dev_priv->mm.request_list,
1692                                            struct drm_i915_gem_request,
1693                                            list);
1694                 retiring_seqno = request->seqno;
1695
1696                 if (i915_seqno_passed(seqno, retiring_seqno) ||
1697                     dev_priv->mm.wedged) {
1698                         i915_gem_retire_request(dev, request);
1699
1700                         list_del(&request->list);
1701                         list_del(&request->client_list);
1702                         kfree(request);
1703                 } else
1704                         break;
1705         }
1706 }
1707
1708 void
1709 i915_gem_retire_work_handler(struct work_struct *work)
1710 {
1711         drm_i915_private_t *dev_priv;
1712         struct drm_device *dev;
1713
1714         dev_priv = container_of(work, drm_i915_private_t,
1715                                 mm.retire_work.work);
1716         dev = dev_priv->dev;
1717
1718         mutex_lock(&dev->struct_mutex);
1719         i915_gem_retire_requests(dev);
1720         if (!dev_priv->mm.suspended &&
1721             !list_empty(&dev_priv->mm.request_list))
1722                 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1723         mutex_unlock(&dev->struct_mutex);
1724 }
1725
1726 /**
1727  * Waits for a sequence number to be signaled, and cleans up the
1728  * request and object lists appropriately for that event.
1729  */
1730 static int
1731 i915_wait_request(struct drm_device *dev, uint32_t seqno)
1732 {
1733         drm_i915_private_t *dev_priv = dev->dev_private;
1734         u32 ier;
1735         int ret = 0;
1736
1737         BUG_ON(seqno == 0);
1738
1739         if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
1740                 if (IS_IGDNG(dev))
1741                         ier = I915_READ(DEIER) | I915_READ(GTIER);
1742                 else
1743                         ier = I915_READ(IER);
1744                 if (!ier) {
1745                         DRM_ERROR("something (likely vbetool) disabled "
1746                                   "interrupts, re-enabling\n");
1747                         i915_driver_irq_preinstall(dev);
1748                         i915_driver_irq_postinstall(dev);
1749                 }
1750
1751                 dev_priv->mm.waiting_gem_seqno = seqno;
1752                 i915_user_irq_get(dev);
1753                 ret = wait_event_interruptible(dev_priv->irq_queue,
1754                                                i915_seqno_passed(i915_get_gem_seqno(dev),
1755                                                                  seqno) ||
1756                                                dev_priv->mm.wedged);
1757                 i915_user_irq_put(dev);
1758                 dev_priv->mm.waiting_gem_seqno = 0;
1759         }
1760         if (dev_priv->mm.wedged)
1761                 ret = -EIO;
1762
1763         if (ret && ret != -ERESTARTSYS)
1764                 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1765                           __func__, ret, seqno, i915_get_gem_seqno(dev));
1766
1767         /* Directly dispatch request retiring.  While we have the work queue
1768          * to handle this, the waiter on a request often wants an associated
1769          * buffer to have made it to the inactive list, and we would need
1770          * a separate wait queue to handle that.
1771          */
1772         if (ret == 0)
1773                 i915_gem_retire_requests(dev);
1774
1775         return ret;
1776 }
1777
1778 static void
1779 i915_gem_flush(struct drm_device *dev,
1780                uint32_t invalidate_domains,
1781                uint32_t flush_domains)
1782 {
1783         drm_i915_private_t *dev_priv = dev->dev_private;
1784         uint32_t cmd;
1785         RING_LOCALS;
1786
1787 #if WATCH_EXEC
1788         DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
1789                   invalidate_domains, flush_domains);
1790 #endif
1791
1792         if (flush_domains & I915_GEM_DOMAIN_CPU)
1793                 drm_agp_chipset_flush(dev);
1794
1795         if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
1796                 /*
1797                  * read/write caches:
1798                  *
1799                  * I915_GEM_DOMAIN_RENDER is always invalidated, but is
1800                  * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
1801                  * also flushed at 2d versus 3d pipeline switches.
1802                  *
1803                  * read-only caches:
1804                  *
1805                  * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
1806                  * MI_READ_FLUSH is set, and is always flushed on 965.
1807                  *
1808                  * I915_GEM_DOMAIN_COMMAND may not exist?
1809                  *
1810                  * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
1811                  * invalidated when MI_EXE_FLUSH is set.
1812                  *
1813                  * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
1814                  * invalidated with every MI_FLUSH.
1815                  *
1816                  * TLBs:
1817                  *
1818                  * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
1819                  * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
1820                  * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
1821                  * are flushed at any MI_FLUSH.
1822                  */
1823
1824                 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1825                 if ((invalidate_domains|flush_domains) &
1826                     I915_GEM_DOMAIN_RENDER)
1827                         cmd &= ~MI_NO_WRITE_FLUSH;
1828                 if (!IS_I965G(dev)) {
1829                         /*
1830                          * On the 965, the sampler cache always gets flushed
1831                          * and this bit is reserved.
1832                          */
1833                         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
1834                                 cmd |= MI_READ_FLUSH;
1835                 }
1836                 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
1837                         cmd |= MI_EXE_FLUSH;
1838
1839 #if WATCH_EXEC
1840                 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
1841 #endif
1842                 BEGIN_LP_RING(2);
1843                 OUT_RING(cmd);
1844                 OUT_RING(0); /* noop */
1845                 ADVANCE_LP_RING();
1846         }
1847 }
1848
1849 /**
1850  * Ensures that all rendering to the object has completed and the object is
1851  * safe to unbind from the GTT or access from the CPU.
1852  */
1853 static int
1854 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1855 {
1856         struct drm_device *dev = obj->dev;
1857         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1858         int ret;
1859
1860         /* This function only exists to support waiting for existing rendering,
1861          * not for emitting required flushes.
1862          */
1863         BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1864
1865         /* If there is rendering queued on the buffer being evicted, wait for
1866          * it.
1867          */
1868         if (obj_priv->active) {
1869 #if WATCH_BUF
1870                 DRM_INFO("%s: object %p wait for seqno %08x\n",
1871                           __func__, obj, obj_priv->last_rendering_seqno);
1872 #endif
1873                 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
1874                 if (ret != 0)
1875                         return ret;
1876         }
1877
1878         return 0;
1879 }
1880
1881 /**
1882  * Unbinds an object from the GTT aperture.
1883  */
1884 int
1885 i915_gem_object_unbind(struct drm_gem_object *obj)
1886 {
1887         struct drm_device *dev = obj->dev;
1888         struct drm_i915_gem_object *obj_priv = obj->driver_private;
1889         int ret = 0;
1890
1891 #if WATCH_BUF
1892         DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1893         DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1894 #endif
1895         if (obj_priv->gtt_space == NULL)
1896                 return 0;
1897
1898         if (obj_priv->pin_count != 0) {
1899                 DRM_ERROR("Attempting to unbind pinned buffer\n");
1900                 return -EINVAL;
1901         }
1902
1903         /* Move the object to the CPU domain to ensure that
1904          * any possible CPU writes while it's not in the GTT
1905          * are flushed when we go to remap it. This will
1906          * also ensure that all pending GPU writes are finished
1907          * before we unbind.
1908          */
1909         ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1910         if (ret) {
1911                 if (ret != -ERESTARTSYS)
1912                         DRM_ERROR("set_domain failed: %d\n", ret);
1913                 return ret;
1914         }
1915
1916         if (obj_priv->agp_mem != NULL) {
1917                 drm_unbind_agp(obj_priv->agp_mem);
1918                 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1919                 obj_priv->agp_mem = NULL;
1920         }
1921
1922         BUG_ON(obj_priv->active);
1923
1924         /* blow away mappings if mapped through GTT */
1925         i915_gem_release_mmap(obj);
1926
1927         if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1928                 i915_gem_clear_fence_reg(obj);
1929
1930         i915_gem_object_put_pages(obj);
1931
1932         if (obj_priv->gtt_space) {
1933                 atomic_dec(&dev->gtt_count);
1934                 atomic_sub(obj->size, &dev->gtt_memory);
1935
1936                 drm_mm_put_block(obj_priv->gtt_space);
1937                 obj_priv->gtt_space = NULL;
1938         }
1939
1940         /* Remove ourselves from the LRU list if present. */
1941         if (!list_empty(&obj_priv->list))
1942                 list_del_init(&obj_priv->list);
1943
1944         return 0;
1945 }
1946
1947 static int
1948 i915_gem_evict_something(struct drm_device *dev)
1949 {
1950         drm_i915_private_t *dev_priv = dev->dev_private;
1951         struct drm_gem_object *obj;
1952         struct drm_i915_gem_object *obj_priv;
1953         int ret = 0;
1954
1955         for (;;) {
1956                 /* If there's an inactive buffer available now, grab it
1957                  * and be done.
1958                  */
1959                 if (!list_empty(&dev_priv->mm.inactive_list)) {
1960                         obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
1961                                                     struct drm_i915_gem_object,
1962                                                     list);
1963                         obj = obj_priv->obj;
1964                         BUG_ON(obj_priv->pin_count != 0);
1965 #if WATCH_LRU
1966                         DRM_INFO("%s: evicting %p\n", __func__, obj);
1967 #endif
1968                         BUG_ON(obj_priv->active);
1969
1970                         /* Wait on the rendering and unbind the buffer. */
1971                         ret = i915_gem_object_unbind(obj);
1972                         break;
1973                 }
1974
1975                 /* If we didn't get anything, but the ring is still processing
1976                  * things, wait for one of those things to finish and hopefully
1977                  * leave us a buffer to evict.
1978                  */
1979                 if (!list_empty(&dev_priv->mm.request_list)) {
1980                         struct drm_i915_gem_request *request;
1981
1982                         request = list_first_entry(&dev_priv->mm.request_list,
1983                                                    struct drm_i915_gem_request,
1984                                                    list);
1985
1986                         ret = i915_wait_request(dev, request->seqno);
1987                         if (ret)
1988                                 break;
1989
1990                         /* if waiting caused an object to become inactive,
1991                          * then loop around and wait for it. Otherwise, we
1992                          * assume that waiting freed and unbound something,
1993                          * so there should now be some space in the GTT
1994                          */
1995                         if (!list_empty(&dev_priv->mm.inactive_list))
1996                                 continue;
1997                         break;
1998                 }
1999
2000                 /* If we didn't have anything on the request list but there
2001                  * are buffers awaiting a flush, emit one and try again.
2002                  * When we wait on it, those buffers waiting for that flush
2003                  * will get moved to inactive.
2004                  */
2005                 if (!list_empty(&dev_priv->mm.flushing_list)) {
2006                         obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
2007                                                     struct drm_i915_gem_object,
2008                                                     list);
2009                         obj = obj_priv->obj;
2010
2011                         i915_gem_flush(dev,
2012                                        obj->write_domain,
2013                                        obj->write_domain);
2014                         i915_add_request(dev, NULL, obj->write_domain);
2015
2016                         obj = NULL;
2017                         continue;
2018                 }
2019
2020                 DRM_ERROR("inactive empty %d request empty %d "
2021                           "flushing empty %d\n",
2022                           list_empty(&dev_priv->mm.inactive_list),
2023                           list_empty(&dev_priv->mm.request_list),
2024                           list_empty(&dev_priv->mm.flushing_list));
2025                 /* If we didn't do any of the above, there's nothing to be done
2026                  * and we just can't fit it in.
2027                  */
2028                 return -ENOSPC;
2029         }
2030         return ret;
2031 }
2032
2033 static int
2034 i915_gem_evict_everything(struct drm_device *dev)
2035 {
2036         int ret;
2037
2038         for (;;) {
2039                 ret = i915_gem_evict_something(dev);
2040                 if (ret != 0)
2041                         break;
2042         }
2043         if (ret == -ENOSPC)
2044                 return 0;
2045         return ret;
2046 }
2047
2048 int
2049 i915_gem_object_get_pages(struct drm_gem_object *obj)
2050 {
2051         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2052         int page_count, i;
2053         struct address_space *mapping;
2054         struct inode *inode;
2055         struct page *page;
2056         int ret;
2057
2058         if (obj_priv->pages_refcount++ != 0)
2059                 return 0;
2060
2061         /* Get the list of pages out of our struct file.  They'll be pinned
2062          * at this point until we release them.
2063          */
2064         page_count = obj->size / PAGE_SIZE;
2065         BUG_ON(obj_priv->pages != NULL);
2066         obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2067         if (obj_priv->pages == NULL) {
2068                 DRM_ERROR("Faled to allocate page list\n");
2069                 obj_priv->pages_refcount--;
2070                 return -ENOMEM;
2071         }
2072
2073         inode = obj->filp->f_path.dentry->d_inode;
2074         mapping = inode->i_mapping;
2075         for (i = 0; i < page_count; i++) {
2076                 page = read_mapping_page(mapping, i, NULL);
2077                 if (IS_ERR(page)) {
2078                         ret = PTR_ERR(page);
2079                         DRM_ERROR("read_mapping_page failed: %d\n", ret);
2080                         i915_gem_object_put_pages(obj);
2081                         return ret;
2082                 }
2083                 obj_priv->pages[i] = page;
2084         }
2085
2086         if (obj_priv->tiling_mode != I915_TILING_NONE)
2087                 i915_gem_object_do_bit_17_swizzle(obj);
2088
2089         return 0;
2090 }
2091
2092 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2093 {
2094         struct drm_gem_object *obj = reg->obj;
2095         struct drm_device *dev = obj->dev;
2096         drm_i915_private_t *dev_priv = dev->dev_private;
2097         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2098         int regnum = obj_priv->fence_reg;
2099         uint64_t val;
2100
2101         val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2102                     0xfffff000) << 32;
2103         val |= obj_priv->gtt_offset & 0xfffff000;
2104         val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2105         if (obj_priv->tiling_mode == I915_TILING_Y)
2106                 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2107         val |= I965_FENCE_REG_VALID;
2108
2109         I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2110 }
2111
2112 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2113 {
2114         struct drm_gem_object *obj = reg->obj;
2115         struct drm_device *dev = obj->dev;
2116         drm_i915_private_t *dev_priv = dev->dev_private;
2117         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2118         int regnum = obj_priv->fence_reg;
2119         int tile_width;
2120         uint32_t fence_reg, val;
2121         uint32_t pitch_val;
2122
2123         if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2124             (obj_priv->gtt_offset & (obj->size - 1))) {
2125                 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2126                      __func__, obj_priv->gtt_offset, obj->size);
2127                 return;
2128         }
2129
2130         if (obj_priv->tiling_mode == I915_TILING_Y &&
2131             HAS_128_BYTE_Y_TILING(dev))
2132                 tile_width = 128;
2133         else
2134                 tile_width = 512;
2135
2136         /* Note: pitch better be a power of two tile widths */
2137         pitch_val = obj_priv->stride / tile_width;
2138         pitch_val = ffs(pitch_val) - 1;
2139
2140         val = obj_priv->gtt_offset;
2141         if (obj_priv->tiling_mode == I915_TILING_Y)
2142                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2143         val |= I915_FENCE_SIZE_BITS(obj->size);
2144         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2145         val |= I830_FENCE_REG_VALID;
2146
2147         if (regnum < 8)
2148                 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2149         else
2150                 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2151         I915_WRITE(fence_reg, val);
2152 }
2153
2154 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2155 {
2156         struct drm_gem_object *obj = reg->obj;
2157         struct drm_device *dev = obj->dev;
2158         drm_i915_private_t *dev_priv = dev->dev_private;
2159         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2160         int regnum = obj_priv->fence_reg;
2161         uint32_t val;
2162         uint32_t pitch_val;
2163         uint32_t fence_size_bits;
2164
2165         if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2166             (obj_priv->gtt_offset & (obj->size - 1))) {
2167                 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2168                      __func__, obj_priv->gtt_offset);
2169                 return;
2170         }
2171
2172         pitch_val = obj_priv->stride / 128;
2173         pitch_val = ffs(pitch_val) - 1;
2174         WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2175
2176         val = obj_priv->gtt_offset;
2177         if (obj_priv->tiling_mode == I915_TILING_Y)
2178                 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2179         fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2180         WARN_ON(fence_size_bits & ~0x00000f00);
2181         val |= fence_size_bits;
2182         val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2183         val |= I830_FENCE_REG_VALID;
2184
2185         I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2186 }
2187
2188 /**
2189  * i915_gem_object_get_fence_reg - set up a fence reg for an object
2190  * @obj: object to map through a fence reg
2191  *
2192  * When mapping objects through the GTT, userspace wants to be able to write
2193  * to them without having to worry about swizzling if the object is tiled.
2194  *
2195  * This function walks the fence regs looking for a free one for @obj,
2196  * stealing one if it can't find any.
2197  *
2198  * It then sets up the reg based on the object's properties: address, pitch
2199  * and tiling format.
2200  */
2201 int
2202 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2203 {
2204         struct drm_device *dev = obj->dev;
2205         struct drm_i915_private *dev_priv = dev->dev_private;
2206         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2207         struct drm_i915_fence_reg *reg = NULL;
2208         struct drm_i915_gem_object *old_obj_priv = NULL;
2209         int i, ret, avail;
2210
2211         switch (obj_priv->tiling_mode) {
2212         case I915_TILING_NONE:
2213                 WARN(1, "allocating a fence for non-tiled object?\n");
2214                 break;
2215         case I915_TILING_X:
2216                 if (!obj_priv->stride)
2217                         return -EINVAL;
2218                 WARN((obj_priv->stride & (512 - 1)),
2219                      "object 0x%08x is X tiled but has non-512B pitch\n",
2220                      obj_priv->gtt_offset);
2221                 break;
2222         case I915_TILING_Y:
2223                 if (!obj_priv->stride)
2224                         return -EINVAL;
2225                 WARN((obj_priv->stride & (128 - 1)),
2226                      "object 0x%08x is Y tiled but has non-128B pitch\n",
2227                      obj_priv->gtt_offset);
2228                 break;
2229         }
2230
2231         /* First try to find a free reg */
2232 try_again:
2233         avail = 0;
2234         for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2235                 reg = &dev_priv->fence_regs[i];
2236                 if (!reg->obj)
2237                         break;
2238
2239                 old_obj_priv = reg->obj->driver_private;
2240                 if (!old_obj_priv->pin_count)
2241                     avail++;
2242         }
2243
2244         /* None available, try to steal one or wait for a user to finish */
2245         if (i == dev_priv->num_fence_regs) {
2246                 uint32_t seqno = dev_priv->mm.next_gem_seqno;
2247
2248                 if (avail == 0)
2249                         return -ENOSPC;
2250
2251                 for (i = dev_priv->fence_reg_start;
2252                      i < dev_priv->num_fence_regs; i++) {
2253                         uint32_t this_seqno;
2254
2255                         reg = &dev_priv->fence_regs[i];
2256                         old_obj_priv = reg->obj->driver_private;
2257
2258                         if (old_obj_priv->pin_count)
2259                                 continue;
2260
2261                         /* i915 uses fences for GPU access to tiled buffers */
2262                         if (IS_I965G(dev) || !old_obj_priv->active)
2263                                 break;
2264
2265                         /* find the seqno of the first available fence */
2266                         this_seqno = old_obj_priv->last_rendering_seqno;
2267                         if (this_seqno != 0 &&
2268                             reg->obj->write_domain == 0 &&
2269                             i915_seqno_passed(seqno, this_seqno))
2270                                 seqno = this_seqno;
2271                 }
2272
2273                 /*
2274                  * Now things get ugly... we have to wait for one of the
2275                  * objects to finish before trying again.
2276                  */
2277                 if (i == dev_priv->num_fence_regs) {
2278                         if (seqno == dev_priv->mm.next_gem_seqno) {
2279                                 i915_gem_flush(dev,
2280                                                I915_GEM_GPU_DOMAINS,
2281                                                I915_GEM_GPU_DOMAINS);
2282                                 seqno = i915_add_request(dev, NULL,
2283                                                          I915_GEM_GPU_DOMAINS);
2284                                 if (seqno == 0)
2285                                         return -ENOMEM;
2286                         }
2287
2288                         ret = i915_wait_request(dev, seqno);
2289                         if (ret)
2290                                 return ret;
2291                         goto try_again;
2292                 }
2293
2294                 /*
2295                  * Zap this virtual mapping so we can set up a fence again
2296                  * for this object next time we need it.
2297                  */
2298                 i915_gem_release_mmap(reg->obj);
2299                 old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
2300         }
2301
2302         obj_priv->fence_reg = i;
2303         reg->obj = obj;
2304
2305         if (IS_I965G(dev))
2306                 i965_write_fence_reg(reg);
2307         else if (IS_I9XX(dev))
2308                 i915_write_fence_reg(reg);
2309         else
2310                 i830_write_fence_reg(reg);
2311
2312         return 0;
2313 }
2314
2315 /**
2316  * i915_gem_clear_fence_reg - clear out fence register info
2317  * @obj: object to clear
2318  *
2319  * Zeroes out the fence register itself and clears out the associated
2320  * data structures in dev_priv and obj_priv.
2321  */
2322 static void
2323 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2324 {
2325         struct drm_device *dev = obj->dev;
2326         drm_i915_private_t *dev_priv = dev->dev_private;
2327         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2328
2329         if (IS_I965G(dev))
2330                 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2331         else {
2332                 uint32_t fence_reg;
2333
2334                 if (obj_priv->fence_reg < 8)
2335                         fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2336                 else
2337                         fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2338                                                        8) * 4;
2339
2340                 I915_WRITE(fence_reg, 0);
2341         }
2342
2343         dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
2344         obj_priv->fence_reg = I915_FENCE_REG_NONE;
2345 }
2346
2347 /**
2348  * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2349  * to the buffer to finish, and then resets the fence register.
2350  * @obj: tiled object holding a fence register.
2351  *
2352  * Zeroes out the fence register itself and clears out the associated
2353  * data structures in dev_priv and obj_priv.
2354  */
2355 int
2356 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2357 {
2358         struct drm_device *dev = obj->dev;
2359         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2360
2361         if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2362                 return 0;
2363
2364         /* On the i915, GPU access to tiled buffers is via a fence,
2365          * therefore we must wait for any outstanding access to complete
2366          * before clearing the fence.
2367          */
2368         if (!IS_I965G(dev)) {
2369                 int ret;
2370
2371                 i915_gem_object_flush_gpu_write_domain(obj);
2372                 i915_gem_object_flush_gtt_write_domain(obj);
2373                 ret = i915_gem_object_wait_rendering(obj);
2374                 if (ret != 0)
2375                         return ret;
2376         }
2377
2378         i915_gem_clear_fence_reg (obj);
2379
2380         return 0;
2381 }
2382
2383 /**
2384  * Finds free space in the GTT aperture and binds the object there.
2385  */
2386 static int
2387 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2388 {
2389         struct drm_device *dev = obj->dev;
2390         drm_i915_private_t *dev_priv = dev->dev_private;
2391         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2392         struct drm_mm_node *free_space;
2393         int page_count, ret;
2394
2395         if (dev_priv->mm.suspended)
2396                 return -EBUSY;
2397         if (alignment == 0)
2398                 alignment = i915_gem_get_gtt_alignment(obj);
2399         if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2400                 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2401                 return -EINVAL;
2402         }
2403
2404  search_free:
2405         free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2406                                         obj->size, alignment, 0);
2407         if (free_space != NULL) {
2408                 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2409                                                        alignment);
2410                 if (obj_priv->gtt_space != NULL) {
2411                         obj_priv->gtt_space->private = obj;
2412                         obj_priv->gtt_offset = obj_priv->gtt_space->start;
2413                 }
2414         }
2415         if (obj_priv->gtt_space == NULL) {
2416                 bool lists_empty;
2417
2418                 /* If the gtt is empty and we're still having trouble
2419                  * fitting our object in, we're out of memory.
2420                  */
2421 #if WATCH_LRU
2422                 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2423 #endif
2424                 spin_lock(&dev_priv->mm.active_list_lock);
2425                 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2426                                list_empty(&dev_priv->mm.flushing_list) &&
2427                                list_empty(&dev_priv->mm.active_list));
2428                 spin_unlock(&dev_priv->mm.active_list_lock);
2429                 if (lists_empty) {
2430                         DRM_ERROR("GTT full, but LRU list empty\n");
2431                         return -ENOSPC;
2432                 }
2433
2434                 ret = i915_gem_evict_something(dev);
2435                 if (ret != 0) {
2436                         if (ret != -ERESTARTSYS)
2437                                 DRM_ERROR("Failed to evict a buffer %d\n", ret);
2438                         return ret;
2439                 }
2440                 goto search_free;
2441         }
2442
2443 #if WATCH_BUF
2444         DRM_INFO("Binding object of size %zd at 0x%08x\n",
2445                  obj->size, obj_priv->gtt_offset);
2446 #endif
2447         ret = i915_gem_object_get_pages(obj);
2448         if (ret) {
2449                 drm_mm_put_block(obj_priv->gtt_space);
2450                 obj_priv->gtt_space = NULL;
2451                 return ret;
2452         }
2453
2454         page_count = obj->size / PAGE_SIZE;
2455         /* Create an AGP memory structure pointing at our pages, and bind it
2456          * into the GTT.
2457          */
2458         obj_priv->agp_mem = drm_agp_bind_pages(dev,
2459                                                obj_priv->pages,
2460                                                page_count,
2461                                                obj_priv->gtt_offset,
2462                                                obj_priv->agp_type);
2463         if (obj_priv->agp_mem == NULL) {
2464                 i915_gem_object_put_pages(obj);
2465                 drm_mm_put_block(obj_priv->gtt_space);
2466                 obj_priv->gtt_space = NULL;
2467                 return -ENOMEM;
2468         }
2469         atomic_inc(&dev->gtt_count);
2470         atomic_add(obj->size, &dev->gtt_memory);
2471
2472         /* Assert that the object is not currently in any GPU domain. As it
2473          * wasn't in the GTT, there shouldn't be any way it could have been in
2474          * a GPU cache
2475          */
2476         BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2477         BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2478
2479         return 0;
2480 }
2481
2482 void
2483 i915_gem_clflush_object(struct drm_gem_object *obj)
2484 {
2485         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
2486
2487         /* If we don't have a page list set up, then we're not pinned
2488          * to GPU, and we can ignore the cache flush because it'll happen
2489          * again at bind time.
2490          */
2491         if (obj_priv->pages == NULL)
2492                 return;
2493
2494         /* XXX: The 865 in particular appears to be weird in how it handles
2495          * cache flushing.  We haven't figured it out, but the
2496          * clflush+agp_chipset_flush doesn't appear to successfully get the
2497          * data visible to the PGU, while wbinvd + agp_chipset_flush does.
2498          */
2499         if (IS_I865G(obj->dev)) {
2500                 wbinvd();
2501                 return;
2502         }
2503
2504         drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2505 }
2506
2507 /** Flushes any GPU write domain for the object if it's dirty. */
2508 static void
2509 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2510 {
2511         struct drm_device *dev = obj->dev;
2512         uint32_t seqno;
2513
2514         if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2515                 return;
2516
2517         /* Queue the GPU write cache flushing we need. */
2518         i915_gem_flush(dev, 0, obj->write_domain);
2519         seqno = i915_add_request(dev, NULL, obj->write_domain);
2520         obj->write_domain = 0;
2521         i915_gem_object_move_to_active(obj, seqno);
2522 }
2523
2524 /** Flushes the GTT write domain for the object if it's dirty. */
2525 static void
2526 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2527 {
2528         if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2529                 return;
2530
2531         /* No actual flushing is required for the GTT write domain.   Writes
2532          * to it immediately go to main memory as far as we know, so there's
2533          * no chipset flush.  It also doesn't land in render cache.
2534          */
2535         obj->write_domain = 0;
2536 }
2537
2538 /** Flushes the CPU write domain for the object if it's dirty. */
2539 static void
2540 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2541 {
2542         struct drm_device *dev = obj->dev;
2543
2544         if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2545                 return;
2546
2547         i915_gem_clflush_object(obj);
2548         drm_agp_chipset_flush(dev);
2549         obj->write_domain = 0;
2550 }
2551
2552 /**
2553  * Moves a single object to the GTT read, and possibly write domain.
2554  *
2555  * This function returns when the move is complete, including waiting on
2556  * flushes to occur.
2557  */
2558 int
2559 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2560 {
2561         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2562         int ret;
2563
2564         /* Not valid to be called on unbound objects. */
2565         if (obj_priv->gtt_space == NULL)
2566                 return -EINVAL;
2567
2568         i915_gem_object_flush_gpu_write_domain(obj);
2569         /* Wait on any GPU rendering and flushing to occur. */
2570         ret = i915_gem_object_wait_rendering(obj);
2571         if (ret != 0)
2572                 return ret;
2573
2574         /* If we're writing through the GTT domain, then CPU and GPU caches
2575          * will need to be invalidated at next use.
2576          */
2577         if (write)
2578                 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2579
2580         i915_gem_object_flush_cpu_write_domain(obj);
2581
2582         /* It should now be out of any other write domains, and we can update
2583          * the domain values for our changes.
2584          */
2585         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2586         obj->read_domains |= I915_GEM_DOMAIN_GTT;
2587         if (write) {
2588                 obj->write_domain = I915_GEM_DOMAIN_GTT;
2589                 obj_priv->dirty = 1;
2590         }
2591
2592         return 0;
2593 }
2594
2595 /**
2596  * Moves a single object to the CPU read, and possibly write domain.
2597  *
2598  * This function returns when the move is complete, including waiting on
2599  * flushes to occur.
2600  */
2601 static int
2602 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2603 {
2604         int ret;
2605
2606         i915_gem_object_flush_gpu_write_domain(obj);
2607         /* Wait on any GPU rendering and flushing to occur. */
2608         ret = i915_gem_object_wait_rendering(obj);
2609         if (ret != 0)
2610                 return ret;
2611
2612         i915_gem_object_flush_gtt_write_domain(obj);
2613
2614         /* If we have a partially-valid cache of the object in the CPU,
2615          * finish invalidating it and free the per-page flags.
2616          */
2617         i915_gem_object_set_to_full_cpu_read_domain(obj);
2618
2619         /* Flush the CPU cache if it's still invalid. */
2620         if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2621                 i915_gem_clflush_object(obj);
2622
2623                 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2624         }
2625
2626         /* It should now be out of any other write domains, and we can update
2627          * the domain values for our changes.
2628          */
2629         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2630
2631         /* If we're writing through the CPU, then the GPU read domains will
2632          * need to be invalidated at next use.
2633          */
2634         if (write) {
2635                 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2636                 obj->write_domain = I915_GEM_DOMAIN_CPU;
2637         }
2638
2639         return 0;
2640 }
2641
2642 /*
2643  * Set the next domain for the specified object. This
2644  * may not actually perform the necessary flushing/invaliding though,
2645  * as that may want to be batched with other set_domain operations
2646  *
2647  * This is (we hope) the only really tricky part of gem. The goal
2648  * is fairly simple -- track which caches hold bits of the object
2649  * and make sure they remain coherent. A few concrete examples may
2650  * help to explain how it works. For shorthand, we use the notation
2651  * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2652  * a pair of read and write domain masks.
2653  *
2654  * Case 1: the batch buffer
2655  *
2656  *      1. Allocated
2657  *      2. Written by CPU
2658  *      3. Mapped to GTT
2659  *      4. Read by GPU
2660  *      5. Unmapped from GTT
2661  *      6. Freed
2662  *
2663  *      Let's take these a step at a time
2664  *
2665  *      1. Allocated
2666  *              Pages allocated from the kernel may still have
2667  *              cache contents, so we set them to (CPU, CPU) always.
2668  *      2. Written by CPU (using pwrite)
2669  *              The pwrite function calls set_domain (CPU, CPU) and
2670  *              this function does nothing (as nothing changes)
2671  *      3. Mapped by GTT
2672  *              This function asserts that the object is not
2673  *              currently in any GPU-based read or write domains
2674  *      4. Read by GPU
2675  *              i915_gem_execbuffer calls set_domain (COMMAND, 0).
2676  *              As write_domain is zero, this function adds in the
2677  *              current read domains (CPU+COMMAND, 0).
2678  *              flush_domains is set to CPU.
2679  *              invalidate_domains is set to COMMAND
2680  *              clflush is run to get data out of the CPU caches
2681  *              then i915_dev_set_domain calls i915_gem_flush to
2682  *              emit an MI_FLUSH and drm_agp_chipset_flush
2683  *      5. Unmapped from GTT
2684  *              i915_gem_object_unbind calls set_domain (CPU, CPU)
2685  *              flush_domains and invalidate_domains end up both zero
2686  *              so no flushing/invalidating happens
2687  *      6. Freed
2688  *              yay, done
2689  *
2690  * Case 2: The shared render buffer
2691  *
2692  *      1. Allocated
2693  *      2. Mapped to GTT
2694  *      3. Read/written by GPU
2695  *      4. set_domain to (CPU,CPU)
2696  *      5. Read/written by CPU
2697  *      6. Read/written by GPU
2698  *
2699  *      1. Allocated
2700  *              Same as last example, (CPU, CPU)
2701  *      2. Mapped to GTT
2702  *              Nothing changes (assertions find that it is not in the GPU)
2703  *      3. Read/written by GPU
2704  *              execbuffer calls set_domain (RENDER, RENDER)
2705  *              flush_domains gets CPU
2706  *              invalidate_domains gets GPU
2707  *              clflush (obj)
2708  *              MI_FLUSH and drm_agp_chipset_flush
2709  *      4. set_domain (CPU, CPU)
2710  *              flush_domains gets GPU
2711  *              invalidate_domains gets CPU
2712  *              wait_rendering (obj) to make sure all drawing is complete.
2713  *              This will include an MI_FLUSH to get the data from GPU
2714  *              to memory
2715  *              clflush (obj) to invalidate the CPU cache
2716  *              Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2717  *      5. Read/written by CPU
2718  *              cache lines are loaded and dirtied
2719  *      6. Read written by GPU
2720  *              Same as last GPU access
2721  *
2722  * Case 3: The constant buffer
2723  *
2724  *      1. Allocated
2725  *      2. Written by CPU
2726  *      3. Read by GPU
2727  *      4. Updated (written) by CPU again
2728  *      5. Read by GPU
2729  *
2730  *      1. Allocated
2731  *              (CPU, CPU)
2732  *      2. Written by CPU
2733  *              (CPU, CPU)
2734  *      3. Read by GPU
2735  *              (CPU+RENDER, 0)
2736  *              flush_domains = CPU
2737  *              invalidate_domains = RENDER
2738  *              clflush (obj)
2739  *              MI_FLUSH
2740  *              drm_agp_chipset_flush
2741  *      4. Updated (written) by CPU again
2742  *              (CPU, CPU)
2743  *              flush_domains = 0 (no previous write domain)
2744  *              invalidate_domains = 0 (no new read domains)
2745  *      5. Read by GPU
2746  *              (CPU+RENDER, 0)
2747  *              flush_domains = CPU
2748  *              invalidate_domains = RENDER
2749  *              clflush (obj)
2750  *              MI_FLUSH
2751  *              drm_agp_chipset_flush
2752  */
2753 static void
2754 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
2755 {
2756         struct drm_device               *dev = obj->dev;
2757         struct drm_i915_gem_object      *obj_priv = obj->driver_private;
2758         uint32_t                        invalidate_domains = 0;
2759         uint32_t                        flush_domains = 0;
2760
2761         BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2762         BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
2763
2764 #if WATCH_BUF
2765         DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2766                  __func__, obj,
2767                  obj->read_domains, obj->pending_read_domains,
2768                  obj->write_domain, obj->pending_write_domain);
2769 #endif
2770         /*
2771          * If the object isn't moving to a new write domain,
2772          * let the object stay in multiple read domains
2773          */
2774         if (obj->pending_write_domain == 0)
2775                 obj->pending_read_domains |= obj->read_domains;
2776         else
2777                 obj_priv->dirty = 1;
2778
2779         /*
2780          * Flush the current write domain if
2781          * the new read domains don't match. Invalidate
2782          * any read domains which differ from the old
2783          * write domain
2784          */
2785         if (obj->write_domain &&
2786             obj->write_domain != obj->pending_read_domains) {
2787                 flush_domains |= obj->write_domain;
2788                 invalidate_domains |=
2789                         obj->pending_read_domains & ~obj->write_domain;
2790         }
2791         /*
2792          * Invalidate any read caches which may have
2793          * stale data. That is, any new read domains.
2794          */
2795         invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
2796         if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2797 #if WATCH_BUF
2798                 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2799                          __func__, flush_domains, invalidate_domains);
2800 #endif
2801                 i915_gem_clflush_object(obj);
2802         }
2803
2804         /* The actual obj->write_domain will be updated with
2805          * pending_write_domain after we emit the accumulated flush for all
2806          * of our domain changes in execbuffers (which clears objects'
2807          * write_domains).  So if we have a current write domain that we
2808          * aren't changing, set pending_write_domain to that.
2809          */
2810         if (flush_domains == 0 && obj->pending_write_domain == 0)
2811                 obj->pending_write_domain = obj->write_domain;
2812         obj->read_domains = obj->pending_read_domains;
2813
2814         dev->invalidate_domains |= invalidate_domains;
2815         dev->flush_domains |= flush_domains;
2816 #if WATCH_BUF
2817         DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
2818                  __func__,
2819                  obj->read_domains, obj->write_domain,
2820                  dev->invalidate_domains, dev->flush_domains);
2821 #endif
2822 }
2823
2824 /**
2825  * Moves the object from a partially CPU read to a full one.
2826  *
2827  * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
2828  * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
2829  */
2830 static void
2831 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
2832 {
2833         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2834
2835         if (!obj_priv->page_cpu_valid)
2836                 return;
2837
2838         /* If we're partially in the CPU read domain, finish moving it in.
2839          */
2840         if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
2841                 int i;
2842
2843                 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
2844                         if (obj_priv->page_cpu_valid[i])
2845                                 continue;
2846                         drm_clflush_pages(obj_priv->pages + i, 1);
2847                 }
2848         }
2849
2850         /* Free the page_cpu_valid mappings which are now stale, whether
2851          * or not we've got I915_GEM_DOMAIN_CPU.
2852          */
2853         kfree(obj_priv->page_cpu_valid);
2854         obj_priv->page_cpu_valid = NULL;
2855 }
2856
2857 /**
2858  * Set the CPU read domain on a range of the object.
2859  *
2860  * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
2861  * not entirely valid.  The page_cpu_valid member of the object flags which
2862  * pages have been flushed, and will be respected by
2863  * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
2864  * of the whole object.
2865  *
2866  * This function returns when the move is complete, including waiting on
2867  * flushes to occur.
2868  */
2869 static int
2870 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
2871                                           uint64_t offset, uint64_t size)
2872 {
2873         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2874         int i, ret;
2875
2876         if (offset == 0 && size == obj->size)
2877                 return i915_gem_object_set_to_cpu_domain(obj, 0);
2878
2879         i915_gem_object_flush_gpu_write_domain(obj);
2880         /* Wait on any GPU rendering and flushing to occur. */
2881         ret = i915_gem_object_wait_rendering(obj);
2882         if (ret != 0)
2883                 return ret;
2884         i915_gem_object_flush_gtt_write_domain(obj);
2885
2886         /* If we're already fully in the CPU read domain, we're done. */
2887         if (obj_priv->page_cpu_valid == NULL &&
2888             (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
2889                 return 0;
2890
2891         /* Otherwise, create/clear the per-page CPU read domain flag if we're
2892          * newly adding I915_GEM_DOMAIN_CPU
2893          */
2894         if (obj_priv->page_cpu_valid == NULL) {
2895                 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
2896                                                    GFP_KERNEL);
2897                 if (obj_priv->page_cpu_valid == NULL)
2898                         return -ENOMEM;
2899         } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
2900                 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
2901
2902         /* Flush the cache on any pages that are still invalid from the CPU's
2903          * perspective.
2904          */
2905         for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
2906              i++) {
2907                 if (obj_priv->page_cpu_valid[i])
2908                         continue;
2909
2910                 drm_clflush_pages(obj_priv->pages + i, 1);
2911
2912                 obj_priv->page_cpu_valid[i] = 1;
2913         }
2914
2915         /* It should now be out of any other write domains, and we can update
2916          * the domain values for our changes.
2917          */
2918         BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2919
2920         obj->read_domains |= I915_GEM_DOMAIN_CPU;
2921
2922         return 0;
2923 }
2924
2925 /**
2926  * Pin an object to the GTT and evaluate the relocations landing in it.
2927  */
2928 static int
2929 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
2930                                  struct drm_file *file_priv,
2931                                  struct drm_i915_gem_exec_object *entry,
2932                                  struct drm_i915_gem_relocation_entry *relocs)
2933 {
2934         struct drm_device *dev = obj->dev;
2935         drm_i915_private_t *dev_priv = dev->dev_private;
2936         struct drm_i915_gem_object *obj_priv = obj->driver_private;
2937         int i, ret;
2938         void __iomem *reloc_page;
2939
2940         /* Choose the GTT offset for our buffer and put it there. */
2941         ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
2942         if (ret)
2943                 return ret;
2944
2945         entry->offset = obj_priv->gtt_offset;
2946
2947         /* Apply the relocations, using the GTT aperture to avoid cache
2948          * flushing requirements.
2949          */
2950         for (i = 0; i < entry->relocation_count; i++) {
2951                 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
2952                 struct drm_gem_object *target_obj;
2953                 struct drm_i915_gem_object *target_obj_priv;
2954                 uint32_t reloc_val, reloc_offset;
2955                 uint32_t __iomem *reloc_entry;
2956
2957                 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
2958                                                    reloc->target_handle);
2959                 if (target_obj == NULL) {
2960                         i915_gem_object_unpin(obj);
2961                         return -EBADF;
2962                 }
2963                 target_obj_priv = target_obj->driver_private;
2964
2965                 /* The target buffer should have appeared before us in the
2966                  * exec_object list, so it should have a GTT space bound by now.
2967                  */
2968                 if (target_obj_priv->gtt_space == NULL) {
2969                         DRM_ERROR("No GTT space found for object %d\n",
2970                                   reloc->target_handle);
2971                         drm_gem_object_unreference(target_obj);
2972                         i915_gem_object_unpin(obj);
2973                         return -EINVAL;
2974                 }
2975
2976                 if (reloc->offset > obj->size - 4) {
2977                         DRM_ERROR("Relocation beyond object bounds: "
2978                                   "obj %p target %d offset %d size %d.\n",
2979                                   obj, reloc->target_handle,
2980                                   (int) reloc->offset, (int) obj->size);
2981                         drm_gem_object_unreference(target_obj);
2982                         i915_gem_object_unpin(obj);
2983                         return -EINVAL;
2984                 }
2985                 if (reloc->offset & 3) {
2986                         DRM_ERROR("Relocation not 4-byte aligned: "
2987                                   "obj %p target %d offset %d.\n",
2988                                   obj, reloc->target_handle,
2989                                   (int) reloc->offset);
2990                         drm_gem_object_unreference(target_obj);
2991                         i915_gem_object_unpin(obj);
2992                         return -EINVAL;
2993                 }
2994
2995                 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
2996                     reloc->read_domains & I915_GEM_DOMAIN_CPU) {
2997                         DRM_ERROR("reloc with read/write CPU domains: "
2998                                   "obj %p target %d offset %d "
2999                                   "read %08x write %08x",
3000                                   obj, reloc->target_handle,
3001                                   (int) reloc->offset,
3002                                   reloc->read_domains,
3003                                   reloc->write_domain);
3004                         drm_gem_object_unreference(target_obj);
3005                         i915_gem_object_unpin(obj);
3006                         return -EINVAL;
3007                 }
3008
3009                 if (reloc->write_domain && target_obj->pending_write_domain &&
3010                     reloc->write_domain != target_obj->pending_write_domain) {
3011                         DRM_ERROR("Write domain conflict: "
3012                                   "obj %p target %d offset %d "
3013                                   "new %08x old %08x\n",
3014                                   obj, reloc->target_handle,
3015                                   (int) reloc->offset,
3016                                   reloc->write_domain,
3017                                   target_obj->pending_write_domain);
3018                         drm_gem_object_unreference(target_obj);
3019                         i915_gem_object_unpin(obj);
3020                         return -EINVAL;
3021                 }
3022
3023 #if WATCH_RELOC
3024                 DRM_INFO("%s: obj %p offset %08x target %d "
3025                          "read %08x write %08x gtt %08x "
3026                          "presumed %08x delta %08x\n",
3027                          __func__,
3028                          obj,
3029                          (int) reloc->offset,
3030                          (int) reloc->target_handle,
3031                          (int) reloc->read_domains,
3032                          (int) reloc->write_domain,
3033                          (int) target_obj_priv->gtt_offset,
3034                          (int) reloc->presumed_offset,
3035                          reloc->delta);
3036 #endif
3037
3038                 target_obj->pending_read_domains |= reloc->read_domains;
3039                 target_obj->pending_write_domain |= reloc->write_domain;
3040
3041                 /* If the relocation already has the right value in it, no
3042                  * more work needs to be done.
3043                  */
3044                 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3045                         drm_gem_object_unreference(target_obj);
3046                         continue;
3047                 }
3048
3049                 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3050                 if (ret != 0) {
3051                         drm_gem_object_unreference(target_obj);
3052                         i915_gem_object_unpin(obj);
3053                         return -EINVAL;
3054                 }
3055
3056                 /* Map the page containing the relocation we're going to
3057                  * perform.
3058                  */
3059                 reloc_offset = obj_priv->gtt_offset + reloc->offset;
3060                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3061                                                       (reloc_offset &
3062                                                        ~(PAGE_SIZE - 1)));
3063                 reloc_entry = (uint32_t __iomem *)(reloc_page +
3064                                                    (reloc_offset & (PAGE_SIZE - 1)));
3065                 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3066
3067 #if WATCH_BUF
3068                 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3069                           obj, (unsigned int) reloc->offset,
3070                           readl(reloc_entry), reloc_val);
3071 #endif
3072                 writel(reloc_val, reloc_entry);
3073                 io_mapping_unmap_atomic(reloc_page);
3074
3075                 /* The updated presumed offset for this entry will be
3076                  * copied back out to the user.
3077                  */
3078                 reloc->presumed_offset = target_obj_priv->gtt_offset;
3079
3080                 drm_gem_object_unreference(target_obj);
3081         }
3082
3083 #if WATCH_BUF
3084         if (0)
3085                 i915_gem_dump_object(obj, 128, __func__, ~0);
3086 #endif
3087         return 0;
3088 }
3089
3090 /** Dispatch a batchbuffer to the ring
3091  */
3092 static int
3093 i915_dispatch_gem_execbuffer(struct drm_device *dev,
3094                               struct drm_i915_gem_execbuffer *exec,
3095                               struct drm_clip_rect *cliprects,
3096                               uint64_t exec_offset)
3097 {
3098         drm_i915_private_t *dev_priv = dev->dev_private;
3099         int nbox = exec->num_cliprects;
3100         int i = 0, count;
3101         uint32_t exec_start, exec_len;
3102         RING_LOCALS;
3103
3104         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3105         exec_len = (uint32_t) exec->batch_len;
3106
3107         count = nbox ? nbox : 1;
3108
3109         for (i = 0; i < count; i++) {
3110                 if (i < nbox) {
3111                         int ret = i915_emit_box(dev, cliprects, i,
3112                                                 exec->DR1, exec->DR4);
3113                         if (ret)
3114                                 return ret;
3115                 }
3116
3117                 if (IS_I830(dev) || IS_845G(dev)) {
3118                         BEGIN_LP_RING(4);
3119                         OUT_RING(MI_BATCH_BUFFER);
3120                         OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3121                         OUT_RING(exec_start + exec_len - 4);
3122                         OUT_RING(0);
3123                         ADVANCE_LP_RING();
3124                 } else {
3125                         BEGIN_LP_RING(2);
3126                         if (IS_I965G(dev)) {
3127                                 OUT_RING(MI_BATCH_BUFFER_START |
3128                                          (2 << 6) |
3129                                          MI_BATCH_NON_SECURE_I965);
3130                                 OUT_RING(exec_start);
3131                         } else {
3132                                 OUT_RING(MI_BATCH_BUFFER_START |
3133                                          (2 << 6));
3134                                 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3135                         }
3136                         ADVANCE_LP_RING();
3137                 }
3138         }
3139
3140         /* XXX breadcrumb */
3141         return 0;
3142 }
3143
3144 /* Throttle our rendering by waiting until the ring has completed our requests
3145  * emitted over 20 msec ago.
3146  *
3147  * Note that if we were to use the current jiffies each time around the loop,
3148  * we wouldn't escape the function with any frames outstanding if the time to
3149  * render a frame was over 20ms.
3150  *
3151  * This should get us reasonable parallelism between CPU and GPU but also
3152  * relatively low latency when blocking on a particular request to finish.
3153  */
3154 static int
3155 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3156 {
3157         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3158         int ret = 0;
3159         unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3160
3161         mutex_lock(&dev->struct_mutex);
3162         while (!list_empty(&i915_file_priv->mm.request_list)) {
3163                 struct drm_i915_gem_request *request;
3164
3165                 request = list_first_entry(&i915_file_priv->mm.request_list,
3166                                            struct drm_i915_gem_request,
3167                                            client_list);
3168
3169                 if (time_after_eq(request->emitted_jiffies, recent_enough))
3170                         break;
3171
3172                 ret = i915_wait_request(dev, request->seqno);
3173                 if (ret != 0)
3174                         break;
3175         }
3176         mutex_unlock(&dev->struct_mutex);
3177
3178         return ret;
3179 }
3180
3181 static int
3182 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
3183                               uint32_t buffer_count,
3184                               struct drm_i915_gem_relocation_entry **relocs)
3185 {
3186         uint32_t reloc_count = 0, reloc_index = 0, i;
3187         int ret;
3188
3189         *relocs = NULL;
3190         for (i = 0; i < buffer_count; i++) {
3191                 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3192                         return -EINVAL;
3193                 reloc_count += exec_list[i].relocation_count;
3194         }
3195
3196         *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3197         if (*relocs == NULL)
3198                 return -ENOMEM;
3199
3200         for (i = 0; i < buffer_count; i++) {
3201                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3202
3203                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3204
3205                 ret = copy_from_user(&(*relocs)[reloc_index],
3206                                      user_relocs,
3207                                      exec_list[i].relocation_count *
3208                                      sizeof(**relocs));
3209                 if (ret != 0) {
3210                         drm_free_large(*relocs);
3211                         *relocs = NULL;
3212                         return -EFAULT;
3213                 }
3214
3215                 reloc_index += exec_list[i].relocation_count;
3216         }
3217
3218         return 0;
3219 }
3220
3221 static int
3222 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
3223                             uint32_t buffer_count,
3224                             struct drm_i915_gem_relocation_entry *relocs)
3225 {
3226         uint32_t reloc_count = 0, i;
3227         int ret = 0;
3228
3229         for (i = 0; i < buffer_count; i++) {
3230                 struct drm_i915_gem_relocation_entry __user *user_relocs;
3231                 int unwritten;
3232
3233                 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3234
3235                 unwritten = copy_to_user(user_relocs,
3236                                          &relocs[reloc_count],
3237                                          exec_list[i].relocation_count *
3238                                          sizeof(*relocs));
3239
3240                 if (unwritten) {
3241                         ret = -EFAULT;
3242                         goto err;
3243                 }
3244
3245                 reloc_count += exec_list[i].relocation_count;
3246         }
3247
3248 err:
3249         drm_free_large(relocs);
3250
3251         return ret;
3252 }
3253
3254 static int
3255 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
3256                            uint64_t exec_offset)
3257 {
3258         uint32_t exec_start, exec_len;
3259
3260         exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3261         exec_len = (uint32_t) exec->batch_len;
3262
3263         if ((exec_start | exec_len) & 0x7)
3264                 return -EINVAL;
3265
3266         if (!exec_start)
3267                 return -EINVAL;
3268
3269         return 0;
3270 }
3271
3272 int
3273 i915_gem_execbuffer(struct drm_device *dev, void *data,
3274                     struct drm_file *file_priv)
3275 {
3276         drm_i915_private_t *dev_priv = dev->dev_private;
3277         struct drm_i915_gem_execbuffer *args = data;
3278         struct drm_i915_gem_exec_object *exec_list = NULL;
3279         struct drm_gem_object **object_list = NULL;
3280         struct drm_gem_object *batch_obj;
3281         struct drm_i915_gem_object *obj_priv;
3282         struct drm_clip_rect *cliprects = NULL;
3283         struct drm_i915_gem_relocation_entry *relocs;
3284         int ret, ret2, i, pinned = 0;
3285         uint64_t exec_offset;
3286         uint32_t seqno, flush_domains, reloc_index;
3287         int pin_tries;
3288
3289 #if WATCH_EXEC
3290         DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3291                   (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3292 #endif
3293
3294         if (args->buffer_count < 1) {
3295                 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3296                 return -EINVAL;
3297         }
3298         /* Copy in the exec list from userland */
3299         exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
3300         object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
3301         if (exec_list == NULL || object_list == NULL) {
3302                 DRM_ERROR("Failed to allocate exec or object list "
3303                           "for %d buffers\n",
3304                           args->buffer_count);
3305                 ret = -ENOMEM;
3306                 goto pre_mutex_err;
3307         }
3308         ret = copy_from_user(exec_list,
3309                              (struct drm_i915_relocation_entry __user *)
3310                              (uintptr_t) args->buffers_ptr,
3311                              sizeof(*exec_list) * args->buffer_count);
3312         if (ret != 0) {
3313                 DRM_ERROR("copy %d exec entries failed %d\n",
3314                           args->buffer_count, ret);
3315                 goto pre_mutex_err;
3316         }
3317
3318         if (args->num_cliprects != 0) {
3319                 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3320                                     GFP_KERNEL);
3321                 if (cliprects == NULL)
3322                         goto pre_mutex_err;
3323
3324                 ret = copy_from_user(cliprects,
3325                                      (struct drm_clip_rect __user *)
3326                                      (uintptr_t) args->cliprects_ptr,
3327                                      sizeof(*cliprects) * args->num_cliprects);
3328                 if (ret != 0) {
3329                         DRM_ERROR("copy %d cliprects failed: %d\n",
3330                                   args->num_cliprects, ret);
3331                         goto pre_mutex_err;
3332                 }
3333         }
3334
3335         ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3336                                             &relocs);
3337         if (ret != 0)
3338                 goto pre_mutex_err;
3339
3340         mutex_lock(&dev->struct_mutex);
3341
3342         i915_verify_inactive(dev, __FILE__, __LINE__);
3343
3344         if (dev_priv->mm.wedged) {
3345                 DRM_ERROR("Execbuf while wedged\n");
3346                 mutex_unlock(&dev->struct_mutex);
3347                 ret = -EIO;
3348                 goto pre_mutex_err;
3349         }
3350
3351         if (dev_priv->mm.suspended) {
3352                 DRM_ERROR("Execbuf while VT-switched.\n");
3353                 mutex_unlock(&dev->struct_mutex);
3354                 ret = -EBUSY;
3355                 goto pre_mutex_err;
3356         }
3357
3358         /* Look up object handles */
3359         for (i = 0; i < args->buffer_count; i++) {
3360                 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3361                                                        exec_list[i].handle);
3362                 if (object_list[i] == NULL) {
3363                         DRM_ERROR("Invalid object handle %d at index %d\n",
3364                                    exec_list[i].handle, i);
3365                         ret = -EBADF;
3366                         goto err;
3367                 }
3368
3369                 obj_priv = object_list[i]->driver_private;
3370                 if (obj_priv->in_execbuffer) {
3371                         DRM_ERROR("Object %p appears more than once in object list\n",
3372                                    object_list[i]);
3373                         ret = -EBADF;
3374                         goto err;
3375                 }
3376                 obj_priv->in_execbuffer = true;
3377         }
3378
3379         /* Pin and relocate */
3380         for (pin_tries = 0; ; pin_tries++) {
3381                 ret = 0;
3382                 reloc_index = 0;
3383
3384                 for (i = 0; i < args->buffer_count; i++) {
3385                         object_list[i]->pending_read_domains = 0;
3386                         object_list[i]->pending_write_domain = 0;
3387                         ret = i915_gem_object_pin_and_relocate(object_list[i],
3388                                                                file_priv,
3389                                                                &exec_list[i],
3390                                                                &relocs[reloc_index]);
3391                         if (ret)
3392                                 break;
3393                         pinned = i + 1;
3394                         reloc_index += exec_list[i].relocation_count;
3395                 }
3396                 /* success */
3397                 if (ret == 0)
3398                         break;
3399
3400                 /* error other than GTT full, or we've already tried again */
3401                 if (ret != -ENOSPC || pin_tries >= 1) {
3402                         if (ret != -ERESTARTSYS)
3403                                 DRM_ERROR("Failed to pin buffers %d\n", ret);
3404                         goto err;
3405                 }
3406
3407                 /* unpin all of our buffers */
3408                 for (i = 0; i < pinned; i++)
3409                         i915_gem_object_unpin(object_list[i]);
3410                 pinned = 0;
3411
3412                 /* evict everyone we can from the aperture */
3413                 ret = i915_gem_evict_everything(dev);
3414                 if (ret)
3415                         goto err;
3416         }
3417
3418         /* Set the pending read domains for the batch buffer to COMMAND */
3419         batch_obj = object_list[args->buffer_count-1];
3420         if (batch_obj->pending_write_domain) {
3421                 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3422                 ret = -EINVAL;
3423                 goto err;
3424         }
3425         batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3426
3427         /* Sanity check the batch buffer, prior to moving objects */
3428         exec_offset = exec_list[args->buffer_count - 1].offset;
3429         ret = i915_gem_check_execbuffer (args, exec_offset);
3430         if (ret != 0) {
3431                 DRM_ERROR("execbuf with invalid offset/length\n");
3432                 goto err;
3433         }
3434
3435         i915_verify_inactive(dev, __FILE__, __LINE__);
3436
3437         /* Zero the global flush/invalidate flags. These
3438          * will be modified as new domains are computed
3439          * for each object
3440          */
3441         dev->invalidate_domains = 0;
3442         dev->flush_domains = 0;
3443
3444         for (i = 0; i < args->buffer_count; i++) {
3445                 struct drm_gem_object *obj = object_list[i];
3446
3447                 /* Compute new gpu domains and update invalidate/flush */
3448                 i915_gem_object_set_to_gpu_domain(obj);
3449         }
3450
3451         i915_verify_inactive(dev, __FILE__, __LINE__);
3452
3453         if (dev->invalidate_domains | dev->flush_domains) {
3454 #if WATCH_EXEC
3455                 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3456                           __func__,
3457                          dev->invalidate_domains,
3458                          dev->flush_domains);
3459 #endif
3460                 i915_gem_flush(dev,
3461                                dev->invalidate_domains,
3462                                dev->flush_domains);
3463                 if (dev->flush_domains)
3464                         (void)i915_add_request(dev, file_priv,
3465                                                dev->flush_domains);
3466         }
3467
3468         for (i = 0; i < args->buffer_count; i++) {
3469                 struct drm_gem_object *obj = object_list[i];
3470
3471                 obj->write_domain = obj->pending_write_domain;
3472         }
3473
3474         i915_verify_inactive(dev, __FILE__, __LINE__);
3475
3476 #if WATCH_COHERENCY
3477         for (i = 0; i < args->buffer_count; i++) {
3478                 i915_gem_object_check_coherency(object_list[i],
3479                                                 exec_list[i].handle);
3480         }
3481 #endif
3482
3483 #if WATCH_EXEC
3484         i915_gem_dump_object(batch_obj,
3485                               args->batch_len,
3486                               __func__,
3487                               ~0);
3488 #endif
3489
3490         /* Exec the batchbuffer */
3491         ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
3492         if (ret) {
3493                 DRM_ERROR("dispatch failed %d\n", ret);
3494                 goto err;
3495         }
3496
3497         /*
3498          * Ensure that the commands in the batch buffer are
3499          * finished before the interrupt fires
3500          */
3501         flush_domains = i915_retire_commands(dev);
3502
3503         i915_verify_inactive(dev, __FILE__, __LINE__);
3504
3505         /*
3506          * Get a seqno representing the execution of the current buffer,
3507          * which we can wait on.  We would like to mitigate these interrupts,
3508          * likely by only creating seqnos occasionally (so that we have
3509          * *some* interrupts representing completion of buffers that we can
3510          * wait on when trying to clear up gtt space).
3511          */
3512         seqno = i915_add_request(dev, file_priv, flush_domains);
3513         BUG_ON(seqno == 0);
3514         for (i = 0; i < args->buffer_count; i++) {
3515                 struct drm_gem_object *obj = object_list[i];
3516
3517                 i915_gem_object_move_to_active(obj, seqno);
3518 #if WATCH_LRU
3519                 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3520 #endif
3521         }
3522 #if WATCH_LRU
3523         i915_dump_lru(dev, __func__);
3524 #endif
3525
3526         i915_verify_inactive(dev, __FILE__, __LINE__);
3527
3528 err:
3529         for (i = 0; i < pinned; i++)
3530                 i915_gem_object_unpin(object_list[i]);
3531
3532         for (i = 0; i < args->buffer_count; i++) {
3533                 if (object_list[i]) {
3534                         obj_priv = object_list[i]->driver_private;
3535                         obj_priv->in_execbuffer = false;
3536                 }
3537                 drm_gem_object_unreference(object_list[i]);
3538         }
3539
3540         mutex_unlock(&dev->struct_mutex);
3541
3542         if (!ret) {
3543                 /* Copy the new buffer offsets back to the user's exec list. */
3544                 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3545                                    (uintptr_t) args->buffers_ptr,
3546                                    exec_list,
3547                                    sizeof(*exec_list) * args->buffer_count);
3548                 if (ret) {
3549                         ret = -EFAULT;
3550                         DRM_ERROR("failed to copy %d exec entries "
3551                                   "back to user (%d)\n",
3552                                   args->buffer_count, ret);
3553                 }
3554         }
3555
3556         /* Copy the updated relocations out regardless of current error
3557          * state.  Failure to update the relocs would mean that the next
3558          * time userland calls execbuf, it would do so with presumed offset
3559          * state that didn't match the actual object state.
3560          */
3561         ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3562                                            relocs);
3563         if (ret2 != 0) {
3564                 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3565
3566                 if (ret == 0)
3567                         ret = ret2;
3568         }
3569
3570 pre_mutex_err:
3571         drm_free_large(object_list);
3572         drm_free_large(exec_list);
3573         kfree(cliprects);
3574
3575         return ret;
3576 }
3577
3578 int
3579 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3580 {
3581         struct drm_device *dev = obj->dev;
3582         struct drm_i915_gem_object *obj_priv = obj->driver_private;
3583         int ret;
3584
3585         i915_verify_inactive(dev, __FILE__, __LINE__);
3586         if (obj_priv->gtt_space == NULL) {
3587                 ret = i915_gem_object_bind_to_gtt(obj, alignment);
3588                 if (ret != 0) {
3589                         if (ret != -EBUSY && ret != -ERESTARTSYS)
3590                                 DRM_ERROR("Failure to bind: %d\n", ret);
3591                         return ret;
3592                 }
3593         }
3594         /*
3595          * Pre-965 chips need a fence register set up in order to
3596          * properly handle tiled surfaces.
3597          */
3598         if (!IS_I965G(dev) &&
3599             obj_priv->fence_reg == I915_FENCE_REG_NONE &&
3600             obj_priv->tiling_mode != I915_TILING_NONE) {
3601                 ret = i915_gem_object_get_fence_reg(obj);
3602                 if (ret != 0) {
3603                         if (ret != -EBUSY && ret != -ERESTARTSYS)
3604                                 DRM_ERROR("Failure to install fence: %d\n",
3605                                           ret);
3606                         return ret;
3607                 }
3608         }
3609         obj_priv->pin_count++;
3610
3611         /* If the object is not active and not pending a flush,
3612          * remove it from the inactive list
3613          */
3614         if (obj_priv->pin_count == 1) {
3615                 atomic_inc(&dev->pin_count);
3616                 atomic_add(obj->size, &dev->pin_memory);
3617                 if (!obj_priv->active &&
3618                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
3619                     !list_empty(&obj_priv->list))
3620                         list_del_init(&obj_priv->list);
3621         }
3622         i915_verify_inactive(dev, __FILE__, __LINE__);
3623
3624         return 0;
3625 }
3626
3627 void
3628 i915_gem_object_unpin(struct drm_gem_object *obj)
3629 {
3630         struct drm_device *dev = obj->dev;
3631         drm_i915_private_t *dev_priv = dev->dev_private;
3632         struct drm_i915_gem_object *obj_priv = obj->driver_private;
3633
3634         i915_verify_inactive(dev, __FILE__, __LINE__);
3635         obj_priv->pin_count--;
3636         BUG_ON(obj_priv->pin_count < 0);
3637         BUG_ON(obj_priv->gtt_space == NULL);
3638
3639         /* If the object is no longer pinned, and is
3640          * neither active nor being flushed, then stick it on
3641          * the inactive list
3642          */
3643         if (obj_priv->pin_count == 0) {
3644                 if (!obj_priv->active &&
3645                     (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
3646                         list_move_tail(&obj_priv->list,
3647                                        &dev_priv->mm.inactive_list);
3648                 atomic_dec(&dev->pin_count);
3649                 atomic_sub(obj->size, &dev->pin_memory);
3650         }
3651         i915_verify_inactive(dev, __FILE__, __LINE__);
3652 }
3653
3654 int
3655 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3656                    struct drm_file *file_priv)
3657 {
3658         struct drm_i915_gem_pin *args = data;
3659         struct drm_gem_object *obj;
3660         struct drm_i915_gem_object *obj_priv;
3661         int ret;
3662
3663         mutex_lock(&dev->struct_mutex);
3664
3665         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3666         if (obj == NULL) {
3667                 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
3668                           args->handle);
3669                 mutex_unlock(&dev->struct_mutex);
3670                 return -EBADF;
3671         }
3672         obj_priv = obj->driver_private;
3673
3674         if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
3675                 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3676                           args->handle);
3677                 drm_gem_object_unreference(obj);
3678                 mutex_unlock(&dev->struct_mutex);
3679                 return -EINVAL;
3680         }
3681
3682         obj_priv->user_pin_count++;
3683         obj_priv->pin_filp = file_priv;
3684         if (obj_priv->user_pin_count == 1) {
3685                 ret = i915_gem_object_pin(obj, args->alignment);
3686                 if (ret != 0) {
3687                         drm_gem_object_unreference(obj);
3688                         mutex_unlock(&dev->struct_mutex);
3689                         return ret;
3690                 }
3691         }
3692
3693         /* XXX - flush the CPU caches for pinned objects
3694          * as the X server doesn't manage domains yet
3695          */
3696         i915_gem_object_flush_cpu_write_domain(obj);
3697         args->offset = obj_priv->gtt_offset;
3698         drm_gem_object_unreference(obj);
3699         mutex_unlock(&dev->struct_mutex);
3700
3701         return 0;
3702 }
3703
3704 int
3705 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3706                      struct drm_file *file_priv)
3707 {
3708         struct drm_i915_gem_pin *args = data;
3709         struct drm_gem_object *obj;
3710         struct drm_i915_gem_object *obj_priv;
3711
3712         mutex_lock(&dev->struct_mutex);
3713
3714         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3715         if (obj == NULL) {
3716                 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
3717                           args->handle);
3718                 mutex_unlock(&dev->struct_mutex);
3719                 return -EBADF;
3720         }
3721
3722         obj_priv = obj->driver_private;
3723         if (obj_priv->pin_filp != file_priv) {
3724                 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3725                           args->handle);
3726                 drm_gem_object_unreference(obj);
3727                 mutex_unlock(&dev->struct_mutex);
3728                 return -EINVAL;
3729         }
3730         obj_priv->user_pin_count--;
3731         if (obj_priv->user_pin_count == 0) {
3732                 obj_priv->pin_filp = NULL;
3733                 i915_gem_object_unpin(obj);
3734         }
3735
3736         drm_gem_object_unreference(obj);
3737         mutex_unlock(&dev->struct_mutex);
3738         return 0;
3739 }
3740
3741 int
3742 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3743                     struct drm_file *file_priv)
3744 {
3745         struct drm_i915_gem_busy *args = data;
3746         struct drm_gem_object *obj;
3747         struct drm_i915_gem_object *obj_priv;
3748
3749         obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3750         if (obj == NULL) {
3751                 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
3752                           args->handle);
3753                 return -EBADF;
3754         }
3755
3756         mutex_lock(&dev->struct_mutex);
3757         /* Update the active list for the hardware's current position.
3758          * Otherwise this only updates on a delayed timer or when irqs are
3759          * actually unmasked, and our working set ends up being larger than
3760          * required.
3761          */
3762         i915_gem_retire_requests(dev);
3763
3764         obj_priv = obj->driver_private;
3765         /* Don't count being on the flushing list against the object being
3766          * done.  Otherwise, a buffer left on the flushing list but not getting
3767          * flushed (because nobody's flushing that domain) won't ever return
3768          * unbusy and get reused by libdrm's bo cache.  The other expected
3769          * consumer of this interface, OpenGL's occlusion queries, also specs
3770          * that the objects get unbusy "eventually" without any interference.
3771          */
3772         args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0;
3773
3774         drm_gem_object_unreference(obj);
3775         mutex_unlock(&dev->struct_mutex);
3776         return 0;
3777 }
3778
3779 int
3780 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3781                         struct drm_file *file_priv)
3782 {
3783     return i915_gem_ring_throttle(dev, file_priv);
3784 }
3785
3786 int i915_gem_init_object(struct drm_gem_object *obj)
3787 {
3788         struct drm_i915_gem_object *obj_priv;
3789
3790         obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL);
3791         if (obj_priv == NULL)
3792                 return -ENOMEM;
3793
3794         /*
3795          * We've just allocated pages from the kernel,
3796          * so they've just been written by the CPU with
3797          * zeros. They'll need to be clflushed before we
3798          * use them with the GPU.
3799          */
3800         obj->write_domain = I915_GEM_DOMAIN_CPU;
3801         obj->read_domains = I915_GEM_DOMAIN_CPU;
3802
3803         obj_priv->agp_type = AGP_USER_MEMORY;
3804
3805         obj->driver_private = obj_priv;
3806         obj_priv->obj = obj;
3807         obj_priv->fence_reg = I915_FENCE_REG_NONE;
3808         INIT_LIST_HEAD(&obj_priv->list);
3809
3810         return 0;
3811 }
3812
3813 void i915_gem_free_object(struct drm_gem_object *obj)
3814 {
3815         struct drm_device *dev = obj->dev;
3816         struct drm_i915_gem_object *obj_priv = obj->driver_private;
3817
3818         while (obj_priv->pin_count > 0)
3819                 i915_gem_object_unpin(obj);
3820
3821         if (obj_priv->phys_obj)
3822                 i915_gem_detach_phys_object(dev, obj);
3823
3824         i915_gem_object_unbind(obj);
3825
3826         i915_gem_free_mmap_offset(obj);
3827
3828         kfree(obj_priv->page_cpu_valid);
3829         kfree(obj_priv->bit_17);
3830         kfree(obj->driver_private);
3831 }
3832
3833 /** Unbinds all objects that are on the given buffer list. */
3834 static int
3835 i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
3836 {
3837         struct drm_gem_object *obj;
3838         struct drm_i915_gem_object *obj_priv;
3839         int ret;
3840
3841         while (!list_empty(head)) {
3842                 obj_priv = list_first_entry(head,
3843                                             struct drm_i915_gem_object,
3844                                             list);
3845                 obj = obj_priv->obj;
3846
3847                 if (obj_priv->pin_count != 0) {
3848                         DRM_ERROR("Pinned object in unbind list\n");
3849                         mutex_unlock(&dev->struct_mutex);
3850                         return -EINVAL;
3851                 }
3852
3853                 ret = i915_gem_object_unbind(obj);
3854                 if (ret != 0) {
3855                         DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
3856                                   ret);
3857                         mutex_unlock(&dev->struct_mutex);
3858                         return ret;
3859                 }
3860         }
3861
3862
3863         return 0;
3864 }
3865
3866 int
3867 i915_gem_idle(struct drm_device *dev)
3868 {
3869         drm_i915_private_t *dev_priv = dev->dev_private;
3870         uint32_t seqno, cur_seqno, last_seqno;
3871         int stuck, ret;
3872
3873         mutex_lock(&dev->struct_mutex);
3874
3875         if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
3876                 mutex_unlock(&dev->struct_mutex);
3877                 return 0;
3878         }
3879
3880         /* Hack!  Don't let anybody do execbuf while we don't control the chip.
3881          * We need to replace this with a semaphore, or something.
3882          */
3883         dev_priv->mm.suspended = 1;
3884
3885         /* Cancel the retire work handler, wait for it to finish if running
3886          */
3887         mutex_unlock(&dev->struct_mutex);
3888         cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3889         mutex_lock(&dev->struct_mutex);
3890
3891         i915_kernel_lost_context(dev);
3892
3893         /* Flush the GPU along with all non-CPU write domains
3894          */
3895         i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
3896         seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
3897
3898         if (seqno == 0) {
3899                 mutex_unlock(&dev->struct_mutex);
3900                 return -ENOMEM;
3901         }
3902
3903         dev_priv->mm.waiting_gem_seqno = seqno;
3904         last_seqno = 0;
3905         stuck = 0;
3906         for (;;) {
3907                 cur_seqno = i915_get_gem_seqno(dev);
3908                 if (i915_seqno_passed(cur_seqno, seqno))
3909                         break;
3910                 if (last_seqno == cur_seqno) {
3911                         if (stuck++ > 100) {
3912                                 DRM_ERROR("hardware wedged\n");
3913                                 dev_priv->mm.wedged = 1;
3914                                 DRM_WAKEUP(&dev_priv->irq_queue);
3915                                 break;
3916                         }
3917                 }
3918                 msleep(10);
3919                 last_seqno = cur_seqno;
3920         }
3921         dev_priv->mm.waiting_gem_seqno = 0;
3922
3923         i915_gem_retire_requests(dev);
3924
3925         spin_lock(&dev_priv->mm.active_list_lock);
3926         if (!dev_priv->mm.wedged) {
3927                 /* Active and flushing should now be empty as we've
3928                  * waited for a sequence higher than any pending execbuffer
3929                  */
3930                 WARN_ON(!list_empty(&dev_priv->mm.active_list));
3931                 WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
3932                 /* Request should now be empty as we've also waited
3933                  * for the last request in the list
3934                  */
3935                 WARN_ON(!list_empty(&dev_priv->mm.request_list));
3936         }
3937
3938         /* Empty the active and flushing lists to inactive.  If there's
3939          * anything left at this point, it means that we're wedged and
3940          * nothing good's going to happen by leaving them there.  So strip
3941          * the GPU domains and just stuff them onto inactive.
3942          */
3943         while (!list_empty(&dev_priv->mm.active_list)) {
3944                 struct drm_i915_gem_object *obj_priv;
3945
3946                 obj_priv = list_first_entry(&dev_priv->mm.active_list,
3947                                             struct drm_i915_gem_object,
3948                                             list);
3949                 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
3950                 i915_gem_object_move_to_inactive(obj_priv->obj);
3951         }
3952         spin_unlock(&dev_priv->mm.active_list_lock);
3953
3954         while (!list_empty(&dev_priv->mm.flushing_list)) {
3955                 struct drm_i915_gem_object *obj_priv;
3956
3957                 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
3958                                             struct drm_i915_gem_object,
3959                                             list);
3960                 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
3961                 i915_gem_object_move_to_inactive(obj_priv->obj);
3962         }
3963
3964
3965         /* Move all inactive buffers out of the GTT. */
3966         ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
3967         WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
3968         if (ret) {
3969                 mutex_unlock(&dev->struct_mutex);
3970                 return ret;
3971         }
3972
3973         i915_gem_cleanup_ringbuffer(dev);
3974         mutex_unlock(&dev->struct_mutex);
3975
3976         return 0;
3977 }
3978
3979 static int
3980 i915_gem_init_hws(struct drm_device *dev)
3981 {
3982         drm_i915_private_t *dev_priv = dev->dev_private;
3983         struct drm_gem_object *obj;
3984         struct drm_i915_gem_object *obj_priv;
3985         int ret;
3986
3987         /* If we need a physical address for the status page, it's already
3988          * initialized at driver load time.
3989          */
3990         if (!I915_NEED_GFX_HWS(dev))
3991                 return 0;
3992
3993         obj = drm_gem_object_alloc(dev, 4096);
3994         if (obj == NULL) {
3995                 DRM_ERROR("Failed to allocate status page\n");
3996                 return -ENOMEM;
3997         }
3998         obj_priv = obj->driver_private;
3999         obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4000
4001         ret = i915_gem_object_pin(obj, 4096);
4002         if (ret != 0) {
4003                 drm_gem_object_unreference(obj);
4004                 return ret;
4005         }
4006
4007         dev_priv->status_gfx_addr = obj_priv->gtt_offset;
4008
4009         dev_priv->hw_status_page = kmap(obj_priv->pages[0]);
4010         if (dev_priv->hw_status_page == NULL) {
4011                 DRM_ERROR("Failed to map status page.\n");
4012                 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
4013                 i915_gem_object_unpin(obj);
4014                 drm_gem_object_unreference(obj);
4015                 return -EINVAL;
4016         }
4017         dev_priv->hws_obj = obj;
4018         memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
4019         I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
4020         I915_READ(HWS_PGA); /* posting read */
4021         DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
4022
4023         return 0;
4024 }
4025
4026 static void
4027 i915_gem_cleanup_hws(struct drm_device *dev)
4028 {
4029         drm_i915_private_t *dev_priv = dev->dev_private;
4030         struct drm_gem_object *obj;
4031         struct drm_i915_gem_object *obj_priv;
4032
4033         if (dev_priv->hws_obj == NULL)
4034                 return;
4035
4036         obj = dev_priv->hws_obj;
4037         obj_priv = obj->driver_private;
4038
4039         kunmap(obj_priv->pages[0]);
4040         i915_gem_object_unpin(obj);
4041         drm_gem_object_unreference(obj);
4042         dev_priv->hws_obj = NULL;
4043
4044         memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
4045         dev_priv->hw_status_page = NULL;
4046
4047         /* Write high address into HWS_PGA when disabling. */
4048         I915_WRITE(HWS_PGA, 0x1ffff000);
4049 }
4050
4051 int
4052 i915_gem_init_ringbuffer(struct drm_device *dev)
4053 {
4054         drm_i915_private_t *dev_priv = dev->dev_private;
4055         struct drm_gem_object *obj;
4056         struct drm_i915_gem_object *obj_priv;
4057         drm_i915_ring_buffer_t *ring = &dev_priv->ring;
4058         int ret;
4059         u32 head;
4060
4061         ret = i915_gem_init_hws(dev);
4062         if (ret != 0)
4063                 return ret;
4064
4065         obj = drm_gem_object_alloc(dev, 128 * 1024);
4066         if (obj == NULL) {
4067                 DRM_ERROR("Failed to allocate ringbuffer\n");
4068                 i915_gem_cleanup_hws(dev);
4069                 return -ENOMEM;
4070         }
4071         obj_priv = obj->driver_private;
4072
4073         ret = i915_gem_object_pin(obj, 4096);
4074         if (ret != 0) {
4075                 drm_gem_object_unreference(obj);
4076                 i915_gem_cleanup_hws(dev);
4077                 return ret;
4078         }
4079
4080         /* Set up the kernel mapping for the ring. */
4081         ring->Size = obj->size;
4082         ring->tail_mask = obj->size - 1;
4083
4084         ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
4085         ring->map.size = obj->size;
4086         ring->map.type = 0;
4087         ring->map.flags = 0;
4088         ring->map.mtrr = 0;
4089
4090         drm_core_ioremap_wc(&ring->map, dev);
4091         if (ring->map.handle == NULL) {
4092                 DRM_ERROR("Failed to map ringbuffer.\n");
4093                 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
4094                 i915_gem_object_unpin(obj);
4095                 drm_gem_object_unreference(obj);
4096                 i915_gem_cleanup_hws(dev);
4097                 return -EINVAL;
4098         }
4099         ring->ring_obj = obj;
4100         ring->virtual_start = ring->map.handle;
4101
4102         /* Stop the ring if it's running. */
4103         I915_WRITE(PRB0_CTL, 0);
4104         I915_WRITE(PRB0_TAIL, 0);
4105         I915_WRITE(PRB0_HEAD, 0);
4106
4107         /* Initialize the ring. */
4108         I915_WRITE(PRB0_START, obj_priv->gtt_offset);
4109         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4110
4111         /* G45 ring initialization fails to reset head to zero */
4112         if (head != 0) {
4113                 DRM_ERROR("Ring head not reset to zero "
4114                           "ctl %08x head %08x tail %08x start %08x\n",
4115                           I915_READ(PRB0_CTL),
4116                           I915_READ(PRB0_HEAD),
4117                           I915_READ(PRB0_TAIL),
4118                           I915_READ(PRB0_START));
4119                 I915_WRITE(PRB0_HEAD, 0);
4120
4121                 DRM_ERROR("Ring head forced to zero "
4122                           "ctl %08x head %08x tail %08x start %08x\n",
4123                           I915_READ(PRB0_CTL),
4124                           I915_READ(PRB0_HEAD),
4125                           I915_READ(PRB0_TAIL),
4126                           I915_READ(PRB0_START));
4127         }
4128
4129         I915_WRITE(PRB0_CTL,
4130                    ((obj->size - 4096) & RING_NR_PAGES) |
4131                    RING_NO_REPORT |
4132                    RING_VALID);
4133
4134         head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4135
4136         /* If the head is still not zero, the ring is dead */
4137         if (head != 0) {
4138                 DRM_ERROR("Ring initialization failed "
4139                           "ctl %08x head %08x tail %08x start %08x\n",
4140                           I915_READ(PRB0_CTL),
4141                           I915_READ(PRB0_HEAD),
4142                           I915_READ(PRB0_TAIL),
4143                           I915_READ(PRB0_START));
4144                 return -EIO;
4145         }
4146
4147         /* Update our cache of the ring state */
4148         if (!drm_core_check_feature(dev, DRIVER_MODESET))
4149                 i915_kernel_lost_context(dev);
4150         else {
4151                 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4152                 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
4153                 ring->space = ring->head - (ring->tail + 8);
4154                 if (ring->space < 0)
4155                         ring->space += ring->Size;
4156         }
4157
4158         return 0;
4159 }
4160
4161 void
4162 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4163 {
4164         drm_i915_private_t *dev_priv = dev->dev_private;
4165
4166         if (dev_priv->ring.ring_obj == NULL)
4167                 return;
4168
4169         drm_core_ioremapfree(&dev_priv->ring.map, dev);
4170
4171         i915_gem_object_unpin(dev_priv->ring.ring_obj);
4172         drm_gem_object_unreference(dev_priv->ring.ring_obj);
4173         dev_priv->ring.ring_obj = NULL;
4174         memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
4175
4176         i915_gem_cleanup_hws(dev);
4177 }
4178
4179 int
4180 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4181                        struct drm_file *file_priv)
4182 {
4183         drm_i915_private_t *dev_priv = dev->dev_private;
4184         int ret;
4185
4186         if (drm_core_check_feature(dev, DRIVER_MODESET))
4187                 return 0;
4188
4189         if (dev_priv->mm.wedged) {
4190                 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4191                 dev_priv->mm.wedged = 0;
4192         }
4193
4194         mutex_lock(&dev->struct_mutex);
4195         dev_priv->mm.suspended = 0;
4196
4197         ret = i915_gem_init_ringbuffer(dev);
4198         if (ret != 0) {
4199                 mutex_unlock(&dev->struct_mutex);
4200                 return ret;
4201         }
4202
4203         spin_lock(&dev_priv->mm.active_list_lock);
4204         BUG_ON(!list_empty(&dev_priv->mm.active_list));
4205         spin_unlock(&dev_priv->mm.active_list_lock);
4206
4207         BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4208         BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4209         BUG_ON(!list_empty(&dev_priv->mm.request_list));
4210         mutex_unlock(&dev->struct_mutex);
4211
4212         drm_irq_install(dev);
4213
4214         return 0;
4215 }
4216
4217 int
4218 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4219                        struct drm_file *file_priv)
4220 {
4221         int ret;
4222
4223         if (drm_core_check_feature(dev, DRIVER_MODESET))
4224                 return 0;
4225
4226         ret = i915_gem_idle(dev);
4227         drm_irq_uninstall(dev);
4228
4229         return ret;
4230 }
4231
4232 void
4233 i915_gem_lastclose(struct drm_device *dev)
4234 {
4235         int ret;
4236
4237         if (drm_core_check_feature(dev, DRIVER_MODESET))
4238                 return;
4239
4240         ret = i915_gem_idle(dev);
4241         if (ret)
4242                 DRM_ERROR("failed to idle hardware: %d\n", ret);
4243 }
4244
4245 void
4246 i915_gem_load(struct drm_device *dev)
4247 {
4248         int i;
4249         drm_i915_private_t *dev_priv = dev->dev_private;
4250
4251         spin_lock_init(&dev_priv->mm.active_list_lock);
4252         INIT_LIST_HEAD(&dev_priv->mm.active_list);
4253         INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4254         INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4255         INIT_LIST_HEAD(&dev_priv->mm.request_list);
4256         INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4257                           i915_gem_retire_work_handler);
4258         dev_priv->mm.next_gem_seqno = 1;
4259
4260         /* Old X drivers will take 0-2 for front, back, depth buffers */
4261         dev_priv->fence_reg_start = 3;
4262
4263         if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4264                 dev_priv->num_fence_regs = 16;
4265         else
4266                 dev_priv->num_fence_regs = 8;
4267
4268         /* Initialize fence registers to zero */
4269         if (IS_I965G(dev)) {
4270                 for (i = 0; i < 16; i++)
4271                         I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4272         } else {
4273                 for (i = 0; i < 8; i++)
4274                         I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4275                 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4276                         for (i = 0; i < 8; i++)
4277                                 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4278         }
4279
4280         i915_gem_detect_bit_6_swizzle(dev);
4281 }
4282
4283 /*
4284  * Create a physically contiguous memory object for this object
4285  * e.g. for cursor + overlay regs
4286  */
4287 int i915_gem_init_phys_object(struct drm_device *dev,
4288                               int id, int size)
4289 {
4290         drm_i915_private_t *dev_priv = dev->dev_private;
4291         struct drm_i915_gem_phys_object *phys_obj;
4292         int ret;
4293
4294         if (dev_priv->mm.phys_objs[id - 1] || !size)
4295                 return 0;
4296
4297         phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4298         if (!phys_obj)
4299                 return -ENOMEM;
4300
4301         phys_obj->id = id;
4302
4303         phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
4304         if (!phys_obj->handle) {
4305                 ret = -ENOMEM;
4306                 goto kfree_obj;
4307         }
4308 #ifdef CONFIG_X86
4309         set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4310 #endif
4311
4312         dev_priv->mm.phys_objs[id - 1] = phys_obj;
4313
4314         return 0;
4315 kfree_obj:
4316         kfree(phys_obj);
4317         return ret;
4318 }
4319
4320 void i915_gem_free_phys_object(struct drm_device *dev, int id)
4321 {
4322         drm_i915_private_t *dev_priv = dev->dev_private;
4323         struct drm_i915_gem_phys_object *phys_obj;
4324
4325         if (!dev_priv->mm.phys_objs[id - 1])
4326                 return;
4327
4328         phys_obj = dev_priv->mm.phys_objs[id - 1];
4329         if (phys_obj->cur_obj) {
4330                 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4331         }
4332
4333 #ifdef CONFIG_X86
4334         set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4335 #endif
4336         drm_pci_free(dev, phys_obj->handle);
4337         kfree(phys_obj);
4338         dev_priv->mm.phys_objs[id - 1] = NULL;
4339 }
4340
4341 void i915_gem_free_all_phys_object(struct drm_device *dev)
4342 {
4343         int i;
4344
4345         for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4346                 i915_gem_free_phys_object(dev, i);
4347 }
4348
4349 void i915_gem_detach_phys_object(struct drm_device *dev,
4350                                  struct drm_gem_object *obj)
4351 {
4352         struct drm_i915_gem_object *obj_priv;
4353         int i;
4354         int ret;
4355         int page_count;
4356
4357         obj_priv = obj->driver_private;
4358         if (!obj_priv->phys_obj)
4359                 return;
4360
4361         ret = i915_gem_object_get_pages(obj);
4362         if (ret)
4363                 goto out;
4364
4365         page_count = obj->size / PAGE_SIZE;
4366
4367         for (i = 0; i < page_count; i++) {
4368                 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4369                 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4370
4371                 memcpy(dst, src, PAGE_SIZE);
4372                 kunmap_atomic(dst, KM_USER0);
4373         }
4374         drm_clflush_pages(obj_priv->pages, page_count);
4375         drm_agp_chipset_flush(dev);
4376
4377         i915_gem_object_put_pages(obj);
4378 out:
4379         obj_priv->phys_obj->cur_obj = NULL;
4380         obj_priv->phys_obj = NULL;
4381 }
4382
4383 int
4384 i915_gem_attach_phys_object(struct drm_device *dev,
4385                             struct drm_gem_object *obj, int id)
4386 {
4387         drm_i915_private_t *dev_priv = dev->dev_private;
4388         struct drm_i915_gem_object *obj_priv;
4389         int ret = 0;
4390         int page_count;
4391         int i;
4392
4393         if (id > I915_MAX_PHYS_OBJECT)
4394                 return -EINVAL;
4395
4396         obj_priv = obj->driver_private;
4397
4398         if (obj_priv->phys_obj) {
4399                 if (obj_priv->phys_obj->id == id)
4400                         return 0;
4401                 i915_gem_detach_phys_object(dev, obj);
4402         }
4403
4404
4405         /* create a new object */
4406         if (!dev_priv->mm.phys_objs[id - 1]) {
4407                 ret = i915_gem_init_phys_object(dev, id,
4408                                                 obj->size);
4409                 if (ret) {
4410                         DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4411                         goto out;
4412                 }
4413         }
4414
4415         /* bind to the object */
4416         obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4417         obj_priv->phys_obj->cur_obj = obj;
4418
4419         ret = i915_gem_object_get_pages(obj);
4420         if (ret) {
4421                 DRM_ERROR("failed to get page list\n");
4422                 goto out;
4423         }
4424
4425         page_count = obj->size / PAGE_SIZE;
4426
4427         for (i = 0; i < page_count; i++) {
4428                 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
4429                 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4430
4431                 memcpy(dst, src, PAGE_SIZE);
4432                 kunmap_atomic(src, KM_USER0);
4433         }
4434
4435         i915_gem_object_put_pages(obj);
4436
4437         return 0;
4438 out:
4439         return ret;
4440 }
4441
4442 static int
4443 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4444                      struct drm_i915_gem_pwrite *args,
4445                      struct drm_file *file_priv)
4446 {
4447         struct drm_i915_gem_object *obj_priv = obj->driver_private;
4448         void *obj_addr;
4449         int ret;
4450         char __user *user_data;
4451
4452         user_data = (char __user *) (uintptr_t) args->data_ptr;
4453         obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4454
4455         DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size);
4456         ret = copy_from_user(obj_addr, user_data, args->size);
4457         if (ret)
4458                 return -EFAULT;
4459
4460         drm_agp_chipset_flush(dev);
4461         return 0;
4462 }
4463
4464 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4465 {
4466         struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4467
4468         /* Clean up our request list when the client is going away, so that
4469          * later retire_requests won't dereference our soon-to-be-gone
4470          * file_priv.
4471          */
4472         mutex_lock(&dev->struct_mutex);
4473         while (!list_empty(&i915_file_priv->mm.request_list))
4474                 list_del_init(i915_file_priv->mm.request_list.next);
4475         mutex_unlock(&dev->struct_mutex);
4476 }