Merge commit 'v2.6.32-rc7'
Eric W. Biederman [Tue, 17 Nov 2009 09:01:34 +0000 (01:01 -0800)]
Resolve the conflict between v2.6.32-rc7 where dn_def_dev_handler
gets a small bug fix and the sysctl tree where I am removing all
sysctl strategy routines.

1  2 
drivers/md/md.c
init/Kconfig
kernel/sched.c
net/netfilter/nf_conntrack_proto_tcp.c

diff --combined drivers/md/md.c
@@@ -98,6 -98,7 +98,6 @@@ static struct ctl_table_header *raid_ta
  
  static ctl_table raid_table[] = {
        {
 -              .ctl_name       = DEV_RAID_SPEED_LIMIT_MIN,
                .procname       = "speed_limit_min",
                .data           = &sysctl_speed_limit_min,
                .maxlen         = sizeof(int),
                .proc_handler   = &proc_dointvec,
        },
        {
 -              .ctl_name       = DEV_RAID_SPEED_LIMIT_MAX,
                .procname       = "speed_limit_max",
                .data           = &sysctl_speed_limit_max,
                .maxlen         = sizeof(int),
                .mode           = S_IRUGO|S_IWUSR,
                .proc_handler   = &proc_dointvec,
        },
 -      { .ctl_name = 0 }
 +      { }
  };
  
  static ctl_table raid_dir_table[] = {
        {
 -              .ctl_name       = DEV_RAID,
                .procname       = "raid",
                .maxlen         = 0,
                .mode           = S_IRUGO|S_IXUGO,
                .child          = raid_table,
        },
 -      { .ctl_name = 0 }
 +      { }
  };
  
  static ctl_table raid_root_table[] = {
        {
 -              .ctl_name       = CTL_DEV,
                .procname       = "dev",
                .maxlen         = 0,
                .mode           = 0555,
                .child          = raid_dir_table,
        },
 -      { .ctl_name = 0 }
 +      {  }
  };
  
  static const struct block_device_operations md_fops;
@@@ -6500,8 -6504,9 +6500,9 @@@ void md_do_sync(mddev_t *mddev
   skip:
        mddev->curr_resync = 0;
        mddev->curr_resync_completed = 0;
-       mddev->resync_min = 0;
-       mddev->resync_max = MaxSector;
+       if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+               /* We completed so max setting can be forgotten. */
+               mddev->resync_max = MaxSector;
        sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        wake_up(&resync_wait);
        set_bit(MD_RECOVERY_DONE, &mddev->recovery);
diff --combined init/Kconfig
@@@ -754,7 -754,6 +754,7 @@@ config UID1
  
  config SYSCTL_SYSCALL
        bool "Sysctl syscall support" if EMBEDDED
 +      depends on PROC_SYSCTL
        default y
        select SYSCTL
        ---help---
@@@ -938,7 -937,7 +938,7 @@@ config PERF_EVENT
          Enable kernel support for various performance events provided
          by software and hardware.
  
-         Software events are supported either build-in or via the
+         Software events are supported either built-in or via the
          use of generic tracepoints.
  
          Most modern CPUs support performance events via performance
          used to profile the code that runs on that CPU.
  
          The Linux Performance Event subsystem provides an abstraction of
-         these software and hardware cevent apabilities, available via a
+         these software and hardware event capabilities, available via a
          system call and used by the "perf" utility in tools/perf/. It
          provides per task and per CPU counters, and it provides event
          capabilities on top of those.
diff --combined kernel/sched.c
@@@ -309,6 -309,8 +309,8 @@@ static DEFINE_PER_CPU_SHARED_ALIGNED(st
   */
  static DEFINE_SPINLOCK(task_group_lock);
  
+ #ifdef CONFIG_FAIR_GROUP_SCHED
  #ifdef CONFIG_SMP
  static int root_task_group_empty(void)
  {
  }
  #endif
  
- #ifdef CONFIG_FAIR_GROUP_SCHED
  #ifdef CONFIG_USER_SCHED
  # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
  #else /* !CONFIG_USER_SCHED */
@@@ -1992,6 -1993,38 +1993,38 @@@ static inline void check_class_changed(
                p->sched_class->prio_changed(rq, p, oldprio, running);
  }
  
+ /**
+  * kthread_bind - bind a just-created kthread to a cpu.
+  * @p: thread created by kthread_create().
+  * @cpu: cpu (might not be online, must be possible) for @k to run on.
+  *
+  * Description: This function is equivalent to set_cpus_allowed(),
+  * except that @cpu doesn't need to be online, and the thread must be
+  * stopped (i.e., just returned from kthread_create()).
+  *
+  * Function lives here instead of kthread.c because it messes with
+  * scheduler internals which require locking.
+  */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+       /* Must have done schedule() in kthread() before we set_task_cpu */
+       if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+               WARN_ON(1);
+               return;
+       }
+       spin_lock_irqsave(&rq->lock, flags);
+       set_task_cpu(p, cpu);
+       p->cpus_allowed = cpumask_of_cpu(cpu);
+       p->rt.nr_cpus_allowed = 1;
+       p->flags |= PF_THREAD_BOUND;
+       spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
  #ifdef CONFIG_SMP
  /*
   * Is this task likely cache-hot:
@@@ -2004,7 -2037,7 +2037,7 @@@ task_hot(struct task_struct *p, u64 now
        /*
         * Buddy candidates are cache hot:
         */
-       if (sched_feat(CACHE_HOT_BUDDY) &&
+       if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
                        (&p->se == cfs_rq_of(&p->se)->next ||
                         &p->se == cfs_rq_of(&p->se)->last))
                return 1;
@@@ -7373,16 -7406,17 +7406,16 @@@ static struct ctl_table sd_ctl_dir[] = 
                .procname       = "sched_domain",
                .mode           = 0555,
        },
 -      {0, },
 +      {}
  };
  
  static struct ctl_table sd_ctl_root[] = {
        {
 -              .ctl_name       = CTL_KERN,
                .procname       = "kernel",
                .mode           = 0555,
                .child          = sd_ctl_dir,
        },
 -      {0, },
 +      {}
  };
  
  static struct ctl_table *sd_alloc_ctl_entry(int n)
@@@ -9531,13 -9565,13 +9564,13 @@@ void __init sched_init(void
        current->sched_class = &fair_sched_class;
  
        /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
-       alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
+       zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
  #ifdef CONFIG_SMP
  #ifdef CONFIG_NO_HZ
-       alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+       zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
        alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
  #endif
-       alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
+       zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
  #endif /* SMP */
  
        perf_event_init();
@@@ -492,6 -492,21 +492,21 @@@ static void tcp_sack(const struct sk_bu
        }
  }
  
+ #ifdef CONFIG_NF_NAT_NEEDED
+ static inline s16 nat_offset(const struct nf_conn *ct,
+                            enum ip_conntrack_dir dir,
+                            u32 seq)
+ {
+       typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
+       return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
+ }
+ #define NAT_OFFSET(pf, ct, dir, seq) \
+       (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
+ #else
+ #define NAT_OFFSET(pf, ct, dir, seq)  0
+ #endif
  static bool tcp_in_window(const struct nf_conn *ct,
                          struct ip_ct_tcp *state,
                          enum ip_conntrack_dir dir,
        struct ip_ct_tcp_state *receiver = &state->seen[!dir];
        const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
        __u32 seq, ack, sack, end, win, swin;
+       s16 receiver_offset;
        bool res;
  
        /*
        if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
                tcp_sack(skb, dataoff, tcph, &sack);
  
+       /* Take into account NAT sequence number mangling */
+       receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
+       ack -= receiver_offset;
+       sack -= receiver_offset;
        pr_debug("tcp_in_window: START\n");
        pr_debug("tcp_in_window: ");
        nf_ct_dump_tuple(tuple);
-       pr_debug("seq=%u ack=%u sack=%u win=%u end=%u\n",
-                seq, ack, sack, win, end);
+       pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
+                seq, ack, receiver_offset, sack, receiver_offset, win, end);
        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
                 sender->td_end, sender->td_maxend, sender->td_maxwin,
  
        pr_debug("tcp_in_window: ");
        nf_ct_dump_tuple(tuple);
-       pr_debug("seq=%u ack=%u sack =%u win=%u end=%u\n",
-                seq, ack, sack, win, end);
+       pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
+                seq, ack, receiver_offset, sack, receiver_offset, win, end);
        pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
                 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
                 sender->td_end, sender->td_maxend, sender->td_maxwin,
                        before(seq, sender->td_maxend + 1) ?
                        after(end, sender->td_end - receiver->td_maxwin - 1) ?
                        before(sack, receiver->td_end + 1) ?
-                       after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
+                       after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
                        : "ACK is under the lower bound (possible overly delayed ACK)"
                        : "ACK is over the upper bound (ACKed data not seen yet)"
                        : "SEQ is under the lower bound (already ACKed data retransmitted)"
        return res;
  }
  
- #ifdef CONFIG_NF_NAT_NEEDED
- /* Update sender->td_end after NAT successfully mangled the packet */
- /* Caller must linearize skb at tcp header. */
- void nf_conntrack_tcp_update(const struct sk_buff *skb,
-                            unsigned int dataoff,
-                            struct nf_conn *ct, int dir,
-                            s16 offset)
- {
-       const struct tcphdr *tcph = (const void *)skb->data + dataoff;
-       const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
-       const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[!dir];
-       __u32 end;
-       end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
-       spin_lock_bh(&ct->lock);
-       /*
-        * We have to worry for the ack in the reply packet only...
-        */
-       if (ct->proto.tcp.seen[dir].td_end + offset == end)
-               ct->proto.tcp.seen[dir].td_end = end;
-       ct->proto.tcp.last_end = end;
-       spin_unlock_bh(&ct->lock);
-       pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
-                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-                sender->td_end, sender->td_maxend, sender->td_maxwin,
-                sender->td_scale,
-                receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-                receiver->td_scale);
- }
- EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
- #endif
  #define       TH_FIN  0x01
  #define       TH_SYN  0x02
  #define       TH_RST  0x04
@@@ -1303,6 -1291,7 +1291,6 @@@ static struct ctl_table tcp_sysctl_tabl
                .proc_handler   = proc_dointvec_jiffies,
        },
        {
 -              .ctl_name       = NET_NF_CONNTRACK_TCP_LOOSE,
                .procname       = "nf_conntrack_tcp_loose",
                .data           = &nf_ct_tcp_loose,
                .maxlen         = sizeof(unsigned int),
                .proc_handler   = proc_dointvec,
        },
        {
 -              .ctl_name       = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
                .procname       = "nf_conntrack_tcp_be_liberal",
                .data           = &nf_ct_tcp_be_liberal,
                .maxlen         = sizeof(unsigned int),
                .proc_handler   = proc_dointvec,
        },
        {
 -              .ctl_name       = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
                .procname       = "nf_conntrack_tcp_max_retrans",
                .data           = &nf_ct_tcp_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
 -      {
 -              .ctl_name       = 0
 -      }
 +      { }
  };
  
  #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
@@@ -1399,6 -1392,7 +1387,6 @@@ static struct ctl_table tcp_compat_sysc
                .proc_handler   = proc_dointvec_jiffies,
        },
        {
 -              .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
                .procname       = "ip_conntrack_tcp_loose",
                .data           = &nf_ct_tcp_loose,
                .maxlen         = sizeof(unsigned int),
                .proc_handler   = proc_dointvec,
        },
        {
 -              .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
                .procname       = "ip_conntrack_tcp_be_liberal",
                .data           = &nf_ct_tcp_be_liberal,
                .maxlen         = sizeof(unsigned int),
                .proc_handler   = proc_dointvec,
        },
        {
 -              .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
                .procname       = "ip_conntrack_tcp_max_retrans",
                .data           = &nf_ct_tcp_max_retrans,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
 -      {
 -              .ctl_name       = 0
 -      }
 +      { }
  };
  #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
  #endif /* CONFIG_SYSCTL */