[S390] fix tlb flushing vs. concurrent /proc accesses
Martin Schwidefsky [Tue, 24 Aug 2010 07:26:21 +0000 (09:26 +0200)]
The tlb flushing code uses the mm_users field of the mm_struct to
decide if each page table entry needs to be flushed individually with
IPTE or if a global flush for the mm_struct is sufficient after all page
table updates have been done. The comment for mm_users says "How many
users with user space?" but the /proc code increases mm_users after it
found the process structure by pid without creating a new user process.
Which makes mm_users useless for the decision between the two tlb
flusing methods. The current code can be confused to not flush tlb
entries by a concurrent access to /proc files if e.g. a fork is in
progres. The solution for this problem is to make the tlb flushing
logic independent from the mm_users field.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

arch/s390/include/asm/hugetlb.h
arch/s390/include/asm/mmu.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/tlb.h
arch/s390/include/asm/tlbflush.h
arch/s390/kernel/smp.c
arch/s390/mm/init.c

index 670a1d1..bb8343d 100644 (file)
@@ -97,6 +97,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 {
        pte_t pte = huge_ptep_get(ptep);
 
+       mm->context.flush_mm = 1;
        pmd_clear((pmd_t *) ptep);
        return pte;
 }
@@ -167,7 +168,8 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm,
 ({                                                                     \
        pte_t __pte = huge_ptep_get(__ptep);                            \
        if (pte_write(__pte)) {                                         \
-               if (atomic_read(&(__mm)->mm_users) > 1 ||               \
+               (__mm)->context.flush_mm = 1;                           \
+               if (atomic_read(&(__mm)->context.attach_count) > 1 ||   \
                    (__mm) != current->active_mm)                       \
                        huge_ptep_invalidate(__mm, __addr, __ptep);     \
                set_huge_pte_at(__mm, __addr, __ptep,                   \
index 99e3409..78522cd 100644 (file)
@@ -2,6 +2,8 @@
 #define __MMU_H
 
 typedef struct {
+       atomic_t attach_count;
+       unsigned int flush_mm;
        spinlock_t list_lock;
        struct list_head crst_list;
        struct list_head pgtable_list;
index 976e273..a6f0e7c 100644 (file)
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
+#include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
 
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       atomic_set(&mm->context.attach_count, 0);
+       mm->context.flush_mm = 0;
        mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
 #ifdef CONFIG_64BIT
        mm->context.asce_bits |= _ASCE_TYPE_REGION3;
@@ -76,6 +79,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
        cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
        update_mm(next, tsk);
+       atomic_dec(&prev->context.attach_count);
+       WARN_ON(atomic_read(&prev->context.attach_count) < 0);
+       atomic_inc(&next->context.attach_count);
+       /* Check for TLBs not flushed yet */
+       if (next->context.flush_mm)
+               __tlb_flush_mm(next);
 }
 
 #define enter_lazy_tlb(mm,tsk) do { } while (0)
index 89a504c..3157441 100644 (file)
@@ -880,7 +880,8 @@ static inline void ptep_invalidate(struct mm_struct *mm,
 #define ptep_get_and_clear(__mm, __address, __ptep)                    \
 ({                                                                     \
        pte_t __pte = *(__ptep);                                        \
-       if (atomic_read(&(__mm)->mm_users) > 1 ||                       \
+       (__mm)->context.flush_mm = 1;                                   \
+       if (atomic_read(&(__mm)->context.attach_count) > 1 ||           \
            (__mm) != current->active_mm)                               \
                ptep_invalidate(__mm, __address, __ptep);               \
        else                                                            \
@@ -923,7 +924,8 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 ({                                                                     \
        pte_t __pte = *(__ptep);                                        \
        if (pte_write(__pte)) {                                         \
-               if (atomic_read(&(__mm)->mm_users) > 1 ||               \
+               (__mm)->context.flush_mm = 1;                           \
+               if (atomic_read(&(__mm)->context.attach_count) > 1 ||   \
                    (__mm) != current->active_mm)                       \
                        ptep_invalidate(__mm, __addr, __ptep);          \
                set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \
index 81150b0..fd1c00d 100644 (file)
@@ -50,8 +50,7 @@ static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm,
        struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
 
        tlb->mm = mm;
-       tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) ||
-               (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm);
+       tlb->fullmm = full_mm_flush;
        tlb->nr_ptes = 0;
        tlb->nr_pxds = TLB_NR_PTRS;
        if (tlb->fullmm)
index 304cffa..29d5d6d 100644 (file)
@@ -94,8 +94,12 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 
 static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
 {
-       if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm)
+       spin_lock(&mm->page_table_lock);
+       if (mm->context.flush_mm) {
                __tlb_flush_mm(mm);
+               mm->context.flush_mm = 0;
+       }
+       spin_unlock(&mm->page_table_lock);
 }
 
 /*
index 541053e..8127ebd 100644 (file)
@@ -583,6 +583,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
        sf->gprs[9] = (unsigned long) sf;
        cpu_lowcore->save_area[15] = (unsigned long) sf;
        __ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
+       atomic_inc(&init_mm.context.attach_count);
        asm volatile(
                "       stam    0,15,0(%0)"
                : : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
@@ -659,6 +660,7 @@ void __cpu_die(unsigned int cpu)
        while (sigp_p(0, cpu, sigp_set_prefix) == sigp_busy)
                udelay(10);
        smp_free_lowcore(cpu);
+       atomic_dec(&init_mm.context.attach_count);
        pr_info("Processor %d stopped\n", cpu);
 }
 
index acc91c7..30eb6d0 100644 (file)
@@ -74,6 +74,8 @@ void __init paging_init(void)
        __ctl_load(S390_lowcore.kernel_asce, 13, 13);
        __raw_local_irq_ssm(ssm_mask);
 
+       atomic_set(&init_mm.context.attach_count, 1);
+
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));