[PATCH] Fix handling spurious page fault for hugetlb region
Hugh Dickins [Thu, 20 Oct 2005 15:24:28 +0000 (16:24 +0100)]
This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and
replaces it with a cleaner version that is purely based on page table
operations, so that the synchronization between inode size and hugetlb
mappings becomes moot.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

include/linux/hugetlb.h
mm/hugetlb.c
mm/memory.c

index 42cb7d7..d664330 100644 (file)
@@ -25,6 +25,8 @@ int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
 struct page *alloc_huge_page(void);
 void free_huge_page(struct page *);
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                       unsigned long address, int write_access);
 
 extern unsigned long max_huge_pages;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -99,6 +101,7 @@ static inline unsigned long hugetlb_total_pages(void)
                                                do { } while (0)
 #define alloc_huge_page()                      ({ NULL; })
 #define free_huge_page(p)                      ({ (void)(p); BUG(); })
+#define hugetlb_fault(mm, vma, addr, write)    ({ BUG(); 0; })
 
 #ifndef HPAGE_MASK
 #define HPAGE_MASK     0               /* Keep the compiler happy */
@@ -155,24 +158,11 @@ static inline void set_file_hugepages(struct file *file)
 {
        file->f_op = &hugetlbfs_file_operations;
 }
-
-static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, 
-                                         unsigned long address) 
-{
-       struct inode *inode = vma->vm_file->f_dentry->d_inode;
-       loff_t file_off = address - vma->vm_start;
-       
-       file_off += (vma->vm_pgoff << PAGE_SHIFT);
-       
-       return (file_off < inode->i_size);
-}
-
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)                0
 #define set_file_hugepages(file)       BUG()
 #define hugetlb_zero_setup(size)       ERR_PTR(-ENOSYS)
-#define valid_hugetlb_file_off(vma, address)   0
 
 #endif /* !CONFIG_HUGETLBFS */
 
index a1b30d4..61d3806 100644 (file)
@@ -394,6 +394,28 @@ out:
        return ret;
 }
 
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully.  Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                       unsigned long address, int write_access)
+{
+       int ret = VM_FAULT_SIGBUS;
+       pte_t *pte;
+
+       spin_lock(&mm->page_table_lock);
+       pte = huge_pte_offset(mm, address);
+       if (pte && !pte_none(*pte))
+               ret = VM_FAULT_MINOR;
+       spin_unlock(&mm->page_table_lock);
+       return ret;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        struct page **pages, struct vm_area_struct **vmas,
                        unsigned long *position, int *length, int i)
index 8c88b97..1db40e9 100644 (file)
@@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
        inc_page_state(pgfault);
 
-       if (unlikely(is_vm_hugetlb_page(vma))) {
-               if (valid_hugetlb_file_off(vma, address))
-                       /* We get here only if there was a stale(zero) TLB entry 
-                        * (because of  HW prefetching). 
-                        * Low-level arch code (if needed) should have already
-                        * purged the stale entry as part of this fault handling.  
-                        * Here we just return.
-                        */
-                       return VM_FAULT_MINOR; 
-               else
-                       return VM_FAULT_SIGBUS; /* mapping truncation does this. */
-       }
+       if (unlikely(is_vm_hugetlb_page(vma)))
+               return hugetlb_fault(mm, vma, address, write_access);
 
        /*
         * We need the page table lock to synchronize with kswapd