Merge branch 'master' of /repos/git/net-next-2.6
Patrick McHardy [Wed, 19 Jan 2011 22:51:37 +0000 (23:51 +0100)]
135 files changed:
include/linux/audit.h
include/linux/ip_vs.h
include/linux/netfilter.h
include/linux/netfilter/Kbuild
include/linux/netfilter/nf_conntrack_snmp.h [new file with mode: 0644]
include/linux/netfilter/nfnetlink_conntrack.h
include/linux/netfilter/x_tables.h
include/linux/netfilter/xt_AUDIT.h [new file with mode: 0644]
include/linux/netfilter/xt_CT.h
include/linux/netfilter/xt_NFQUEUE.h
include/linux/netfilter/xt_TCPOPTSTRIP.h
include/linux/netfilter/xt_TPROXY.h
include/linux/netfilter/xt_cluster.h
include/linux/netfilter/xt_comment.h
include/linux/netfilter/xt_conntrack.h
include/linux/netfilter/xt_quota.h
include/linux/netfilter/xt_time.h
include/linux/netfilter/xt_u32.h
include/linux/netfilter_bridge/ebt_802_3.h
include/linux/netfilter_bridge/ebt_among.h
include/linux/netfilter_bridge/ebt_arp.h
include/linux/netfilter_bridge/ebt_ip.h
include/linux/netfilter_bridge/ebt_ip6.h
include/linux/netfilter_bridge/ebt_limit.h
include/linux/netfilter_bridge/ebt_log.h
include/linux/netfilter_bridge/ebt_mark_m.h
include/linux/netfilter_bridge/ebt_nflog.h
include/linux/netfilter_bridge/ebt_pkttype.h
include/linux/netfilter_bridge/ebt_stp.h
include/linux/netfilter_bridge/ebt_ulog.h
include/linux/netfilter_bridge/ebt_vlan.h
include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
include/linux/netfilter_ipv4/ipt_ECN.h
include/linux/netfilter_ipv4/ipt_SAME.h
include/linux/netfilter_ipv4/ipt_TTL.h
include/linux/netfilter_ipv4/ipt_addrtype.h
include/linux/netfilter_ipv4/ipt_ah.h
include/linux/netfilter_ipv4/ipt_ecn.h
include/linux/netfilter_ipv4/ipt_ttl.h
include/linux/netfilter_ipv6/ip6t_HL.h
include/linux/netfilter_ipv6/ip6t_REJECT.h
include/linux/netfilter_ipv6/ip6t_ah.h
include/linux/netfilter_ipv6/ip6t_frag.h
include/linux/netfilter_ipv6/ip6t_hl.h
include/linux/netfilter_ipv6/ip6t_ipv6header.h
include/linux/netfilter_ipv6/ip6t_mh.h
include/linux/netfilter_ipv6/ip6t_opts.h
include/linux/netfilter_ipv6/ip6t_rt.h
include/net/dst.h
include/net/ip_fib.h
include/net/ip_vs.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_conntrack_extend.h
include/net/netfilter/nf_conntrack_helper.h
include/net/netfilter/nf_conntrack_l3proto.h
include/net/netfilter/nf_conntrack_timestamp.h [new file with mode: 0644]
include/net/netfilter/nf_nat.h
include/net/netfilter/nf_nat_core.h
include/net/netns/conntrack.h
include/net/netns/ip_vs.h [new file with mode: 0644]
include/net/netns/ipv4.h
kernel/audit.c
net/bridge/netfilter/ebt_ip6.c
net/bridge/netfilter/ebtables.c
net/ipv4/Kconfig
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/ip_input.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_LOG.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
net/ipv4/netfilter/nf_nat_amanda.c
net/ipv4/netfilter/nf_nat_core.c
net/ipv4/netfilter/nf_nat_snmp_basic.c
net/ipv4/route.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_LOG.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_app.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_est.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_nfct.c
net/netfilter/ipvs/ip_vs_pe.c
net/netfilter/ipvs/ip_vs_pe_sip.c
net/netfilter/ipvs/ip_vs_proto.c
net/netfilter/ipvs/ip_vs_proto_ah_esp.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_proto_udp.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_broadcast.c [new file with mode: 0644]
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_extend.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netbios_ns.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_snmp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_conntrack_timestamp.c [new file with mode: 0644]
net/netfilter/nf_log.c
net/netfilter/nf_queue.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/x_tables.c
net/netfilter/xt_AUDIT.c [new file with mode: 0644]
net/netfilter/xt_CLASSIFY.c
net/netfilter/xt_IDLETIMER.c
net/netfilter/xt_LED.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_connlimit.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_cpu.c
net/netfilter/xt_ipvs.c
net/sched/Kconfig
net/sched/cls_flow.c
net/sched/em_meta.c

index 359df04..9d339eb 100644 (file)
 #define AUDIT_BPRM_FCAPS       1321    /* Information about fcaps increasing perms */
 #define AUDIT_CAPSET           1322    /* Record showing argument to sys_capset */
 #define AUDIT_MMAP             1323    /* Record showing descriptor and flags in mmap */
+#define AUDIT_NETFILTER_PKT    1324    /* Packets traversing netfilter chains */
+#define AUDIT_NETFILTER_CFG    1325    /* Netfilter chain modifications */
 
 #define AUDIT_AVC              1400    /* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR      1401    /* Internal SE Linux Errors */
index 5f43a3b..4deb383 100644 (file)
 #define IP_VS_CONN_F_TEMPLATE  0x1000          /* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET        0x2000          /* forward only one packet */
 
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+                                 IP_VS_CONN_F_NOOUTPUT | \
+                                 IP_VS_CONN_F_INACTIVE | \
+                                 IP_VS_CONN_F_SEQ_MASK | \
+                                 IP_VS_CONN_F_NO_CPORT | \
+                                 IP_VS_CONN_F_TEMPLATE \
+                                )
+
 /* Flags that are not sent to backup server start from bit 16 */
 #define IP_VS_CONN_F_NFCT      (1 << 16)       /* use netfilter conntrack */
 
index 1893837..eeec00a 100644 (file)
 #define NF_MAX_VERDICT NF_STOP
 
 /* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS   0x00008000
 
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
 #define NF_VERDICT_QMASK 0xffff0000
 #define NF_VERDICT_QBITS 16
 
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
 
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
 
 /* only for userspace compatibility */
 #ifndef __KERNEL__
@@ -41,6 +45,9 @@
    <= 0x2000 is used for protocol-flags. */
 #define NFC_UNKNOWN 0x4000
 #define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
 #endif
 
 enum nf_inet_hooks {
@@ -72,6 +79,10 @@ union nf_inet_addr {
 
 #ifdef __KERNEL__
 #ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+       return -(verdict >> NF_VERDICT_QBITS);
+}
 
 static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
                                   const union nf_inet_addr *a2)
@@ -267,7 +278,7 @@ struct nf_afinfo {
        int             route_key_size;
 };
 
-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
 static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
 {
        return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #endif /*CONFIG_NETFILTER*/
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
 extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 #endif
index 9d40eff..fc4e0aa 100644 (file)
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
 header-y += nfnetlink_queue.h
 header-y += x_tables.h
+header-y += xt_AUDIT.h
 header-y += xt_CHECKSUM.h
 header-y += xt_CLASSIFY.h
 header-y += xt_CONNMARK.h
@@ -55,6 +56,8 @@ header-y += xt_rateest.h
 header-y += xt_realm.h
 header-y += xt_recent.h
 header-y += xt_sctp.h
+header-y += xt_secmark.h
+header-y += xt_socket.h
 header-y += xt_state.h
 header-y += xt_statistic.h
 header-y += xt_string.h
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
new file mode 100644 (file)
index 0000000..064bc63
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _NF_CONNTRACK_SNMP_H
+#define _NF_CONNTRACK_SNMP_H
+
+extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+                               unsigned int protoff,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo);
+
+#endif /* _NF_CONNTRACK_SNMP_H */
index 19711e3..debf1ae 100644 (file)
@@ -42,6 +42,7 @@ enum ctattr_type {
        CTA_SECMARK,            /* obsolete */
        CTA_ZONE,
        CTA_SECCTX,
+       CTA_TIMESTAMP,
        __CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@ enum ctattr_counters {
 };
 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
 
+enum ctattr_tstamp {
+       CTA_TIMESTAMP_UNSPEC,
+       CTA_TIMESTAMP_START,
+       CTA_TIMESTAMP_STOP,
+       __CTA_TIMESTAMP_MAX
+};
+#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
+
 enum ctattr_nat {
        CTA_NAT_UNSPEC,
        CTA_NAT_MINIP,
index 6712e71..3721952 100644 (file)
@@ -611,8 +611,9 @@ struct _compat_xt_align {
 extern void xt_compat_lock(u_int8_t af);
 extern void xt_compat_unlock(u_int8_t af);
 
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
 extern void xt_compat_flush_offsets(u_int8_t af);
+extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
 extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 
 extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h
new file mode 100644 (file)
index 0000000..38751d2
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Header file for iptables xt_AUDIT target
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _XT_AUDIT_TARGET_H
+#define _XT_AUDIT_TARGET_H
+
+#include <linux/types.h>
+
+enum {
+       XT_AUDIT_TYPE_ACCEPT = 0,
+       XT_AUDIT_TYPE_DROP,
+       XT_AUDIT_TYPE_REJECT,
+       __XT_AUDIT_TYPE_MAX,
+};
+
+#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
+
+struct xt_audit_info {
+       __u8 type; /* XT_AUDIT_TYPE_* */
+};
+
+#endif /* _XT_AUDIT_TARGET_H */
index 1b56410..fbf4c56 100644 (file)
@@ -4,11 +4,11 @@
 #define XT_CT_NOTRACK  0x1
 
 struct xt_ct_target_info {
-       u_int16_t       flags;
-       u_int16_t       zone;
-       u_int32_t       ct_events;
-       u_int32_t       exp_events;
-       char            helper[16];
+       __u16 flags;
+       __u16 zone;
+       __u32 ct_events;
+       __u32 exp_events;
+       char helper[16];
 
        /* Used internally by the kernel */
        struct nf_conn  *ct __attribute__((aligned(8)));
index 2584f4a..9eafdbb 100644 (file)
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
        __u16 queues_total;
 };
 
+struct xt_NFQ_info_v2 {
+       __u16 queuenum;
+       __u16 queues_total;
+       __u16 bypass;
+};
+
 #endif /* _XT_NFQ_TARGET_H */
index 2db5432..342ef14 100644 (file)
@@ -7,7 +7,7 @@
        (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
 
 struct xt_tcpoptstrip_target_info {
-       u_int32_t strip_bmap[8];
+       __u32 strip_bmap[8];
 };
 
 #endif /* _XT_TCPOPTSTRIP_H */
index 3f3d693..8097e0b 100644 (file)
@@ -5,15 +5,15 @@
  * redirection. We can get rid of that whenever we get support for
  * mutliple targets in the same rule. */
 struct xt_tproxy_target_info {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        __be32 laddr;
        __be16 lport;
 };
 
 struct xt_tproxy_target_info_v1 {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        union nf_inet_addr laddr;
        __be16 lport;
 };
index 8866826..66cfa3c 100644 (file)
@@ -6,10 +6,10 @@ enum xt_cluster_flags {
 };
 
 struct xt_cluster_match_info {
-       u_int32_t               total_nodes;
-       u_int32_t               node_mask;
-       u_int32_t               hash_seed;
-       u_int32_t               flags;
+       __u32 total_nodes;
+       __u32 node_mask;
+       __u32 hash_seed;
+       __u32 flags;
 };
 
 #define XT_CLUSTER_NODES_MAX   32
index eacfedc..0ea5e79 100644 (file)
@@ -4,7 +4,7 @@
 #define XT_MAX_COMMENT_LEN 256
 
 struct xt_comment_info {
-       unsigned char comment[XT_MAX_COMMENT_LEN];
+       char comment[XT_MAX_COMMENT_LEN];
 };
 
 #endif /* XT_COMMENT_H */
index 54f47a2..74b904d 100644 (file)
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 {
        __u16 state_mask, status_mask;
 };
 
+struct xt_conntrack_mtinfo3 {
+       union nf_inet_addr origsrc_addr, origsrc_mask;
+       union nf_inet_addr origdst_addr, origdst_mask;
+       union nf_inet_addr replsrc_addr, replsrc_mask;
+       union nf_inet_addr repldst_addr, repldst_mask;
+       __u32 expires_min, expires_max;
+       __u16 l4proto;
+       __u16 origsrc_port, origdst_port;
+       __u16 replsrc_port, repldst_port;
+       __u16 match_flags, invert_flags;
+       __u16 state_mask, status_mask;
+       __u16 origsrc_port_high, origdst_port_high;
+       __u16 replsrc_port_high, repldst_port_high;
+};
+
 #endif /*_XT_CONNTRACK_H*/
index b0d28c6..8bda65f 100644 (file)
@@ -9,9 +9,9 @@ enum xt_quota_flags {
 struct xt_quota_priv;
 
 struct xt_quota_info {
-       u_int32_t               flags;
-       u_int32_t               pad;
-       aligned_u64             quota;
+       __u32 flags;
+       __u32 pad;
+       aligned_u64 quota;
 
        /* Used internally by the kernel */
        struct xt_quota_priv    *master;
index 14b6df4..b8bd456 100644 (file)
@@ -2,13 +2,13 @@
 #define _XT_TIME_H 1
 
 struct xt_time_info {
-       u_int32_t date_start;
-       u_int32_t date_stop;
-       u_int32_t daytime_start;
-       u_int32_t daytime_stop;
-       u_int32_t monthdays_match;
-       u_int8_t weekdays_match;
-       u_int8_t flags;
+       __u32 date_start;
+       __u32 date_stop;
+       __u32 daytime_start;
+       __u32 daytime_stop;
+       __u32 monthdays_match;
+       __u8 weekdays_match;
+       __u8 flags;
 };
 
 enum {
index 9947f56..e8c3d87 100644 (file)
@@ -9,13 +9,13 @@ enum xt_u32_ops {
 };
 
 struct xt_u32_location_element {
-       u_int32_t number;
-       u_int8_t nextop;
+       __u32 number;
+       __u8 nextop;
 };
 
 struct xt_u32_value_element {
-       u_int32_t min;
-       u_int32_t max;
+       __u32 min;
+       __u32 max;
 };
 
 /*
@@ -27,14 +27,14 @@ struct xt_u32_value_element {
 struct xt_u32_test {
        struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
        struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
-       u_int8_t nnums;
-       u_int8_t nvalues;
+       __u8 nnums;
+       __u8 nvalues;
 };
 
 struct xt_u32 {
        struct xt_u32_test tests[XT_U32_MAXSIZE+1];
-       u_int8_t ntests;
-       u_int8_t invert;
+       __u8 ntests;
+       __u8 invert;
 };
 
 #endif /* _XT_U32_H */
index c73ef0b..c427764 100644 (file)
 
 /* ui has one byte ctrl, ni has two */
 struct hdr_ui {
-       uint8_t dsap;
-       uint8_t ssap;
-       uint8_t ctrl;
-       uint8_t orig[3];
+       __u8 dsap;
+       __u8 ssap;
+       __u8 ctrl;
+       __u8 orig[3];
        __be16 type;
 };
 
 struct hdr_ni {
-       uint8_t dsap;
-       uint8_t ssap;
+       __u8 dsap;
+       __u8 ssap;
        __be16 ctrl;
-       uint8_t  orig[3];
+       __u8  orig[3];
        __be16 type;
 };
 
 struct ebt_802_3_hdr {
-       uint8_t  daddr[6];
-       uint8_t  saddr[6];
+       __u8  daddr[6];
+       __u8  saddr[6];
        __be16 len;
        union {
                struct hdr_ui ui;
@@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
 #endif
 
 struct ebt_802_3_info {
-       uint8_t  sap;
+       __u8  sap;
        __be16 type;
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
 };
 
 #endif
index 0009558..686c961 100644 (file)
@@ -30,7 +30,7 @@
  */
 
 struct ebt_mac_wormhash_tuple {
-       uint32_t cmp[2];
+       __u32 cmp[2];
        __be32 ip;
 };
 
index cbf4843..e62b5af 100644 (file)
@@ -27,8 +27,8 @@ struct ebt_arp_info
        unsigned char smmsk[ETH_ALEN];
        unsigned char dmaddr[ETH_ALEN];
        unsigned char dmmsk[ETH_ALEN];
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
 };
 
 #endif
index 6a708fb..d99de58 100644 (file)
@@ -31,12 +31,12 @@ struct ebt_ip_info {
        __be32 daddr;
        __be32 smsk;
        __be32 dmsk;
-       uint8_t  tos;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tos;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       __u16 sport[2];
+       __u16 dport[2];
 };
 
 #endif
index e5de987..998e9d5 100644 (file)
 #define EBT_IP6_PROTO 0x08
 #define EBT_IP6_SPORT 0x10
 #define EBT_IP6_DPORT 0x20
+#define EBT_IP6_ICMP6 0x40
+
 #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
-                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
+                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
+                     EBT_IP6_ICMP6)
 #define EBT_IP6_MATCH "ip6"
 
 /* the same values are used for the invflags */
@@ -28,12 +31,18 @@ struct ebt_ip6_info {
        struct in6_addr daddr;
        struct in6_addr smsk;
        struct in6_addr dmsk;
-       uint8_t  tclass;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tclass;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       union {
+               __u16 sport[2];
+               __u8 icmpv6_type[2];
+       };
+       union {
+               __u16 dport[2];
+               __u8 icmpv6_code[2];
+       };
 };
 
 #endif
index 4bf76b7..721d51f 100644 (file)
    seconds, or one every 59 hours. */
 
 struct ebt_limit_info {
-       u_int32_t avg;    /* Average secs between packets * scale */
-       u_int32_t burst;  /* Period multiplier for upper limit. */
+       __u32 avg;    /* Average secs between packets * scale */
+       __u32 burst;  /* Period multiplier for upper limit. */
 
        /* Used internally by the kernel */
        unsigned long prev;
-       u_int32_t credit;
-       u_int32_t credit_cap, cost;
+       __u32 credit;
+       __u32 credit_cap, cost;
 };
 
 #endif
index cc2cdfb..564beb4 100644 (file)
@@ -10,9 +10,9 @@
 #define EBT_LOG_WATCHER "log"
 
 struct ebt_log_info {
-       uint8_t loglevel;
-       uint8_t prefix[EBT_LOG_PREFIX_SIZE];
-       uint32_t bitmask;
+       __u8 loglevel;
+       __u8 prefix[EBT_LOG_PREFIX_SIZE];
+       __u32 bitmask;
 };
 
 #endif
index 9ceb10e..97b96c4 100644 (file)
@@ -6,8 +6,8 @@
 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
 struct ebt_mark_m_info {
        unsigned long mark, mask;
-       uint8_t invert;
-       uint8_t bitmask;
+       __u8 invert;
+       __u8 bitmask;
 };
 #define EBT_MARK_MATCH "mark_m"
 
index 0528178..477315b 100644 (file)
 #define EBT_NFLOG_DEFAULT_THRESHOLD    1
 
 struct ebt_nflog_info {
-       u_int32_t len;
-       u_int16_t group;
-       u_int16_t threshold;
-       u_int16_t flags;
-       u_int16_t pad;
+       __u32 len;
+       __u16 group;
+       __u16 threshold;
+       __u16 flags;
+       __u16 pad;
        char prefix[EBT_NFLOG_PREFIX_SIZE];
 };
 
index 51a7998..7c0fb0f 100644 (file)
@@ -2,8 +2,8 @@
 #define __LINUX_BRIDGE_EBT_PKTTYPE_H
 
 struct ebt_pkttype_info {
-       uint8_t pkt_type;
-       uint8_t invert;
+       __u8 pkt_type;
+       __u8 invert;
 };
 #define EBT_PKTTYPE_MATCH "pkttype"
 
index e503a0a..13a0bd4 100644 (file)
 #define EBT_STP_MATCH "stp"
 
 struct ebt_stp_config_info {
-       uint8_t flags;
-       uint16_t root_priol, root_priou;
+       __u8 flags;
+       __u16 root_priol, root_priou;
        char root_addr[6], root_addrmsk[6];
-       uint32_t root_costl, root_costu;
-       uint16_t sender_priol, sender_priou;
+       __u32 root_costl, root_costu;
+       __u16 sender_priol, sender_priou;
        char sender_addr[6], sender_addrmsk[6];
-       uint16_t portl, portu;
-       uint16_t msg_agel, msg_ageu;
-       uint16_t max_agel, max_ageu;
-       uint16_t hello_timel, hello_timeu;
-       uint16_t forward_delayl, forward_delayu;
+       __u16 portl, portu;
+       __u16 msg_agel, msg_ageu;
+       __u16 max_agel, max_ageu;
+       __u16 hello_timel, hello_timeu;
+       __u16 forward_delayl, forward_delayu;
 };
 
 struct ebt_stp_info {
-       uint8_t type;
+       __u8 type;
        struct ebt_stp_config_info config;
-       uint16_t bitmask;
-       uint16_t invflags;
+       __u16 bitmask;
+       __u16 invflags;
 };
 
 #endif
index b677e26..de35a51 100644 (file)
@@ -10,7 +10,7 @@
 #define EBT_ULOG_VERSION 1
 
 struct ebt_ulog_info {
-       uint32_t nlgroup;
+       __u32 nlgroup;
        unsigned int cprange;
        unsigned int qthreshold;
        char prefix[EBT_ULOG_PREFIX_LEN];
index 1d98be4..48dffc1 100644 (file)
@@ -8,12 +8,12 @@
 #define EBT_VLAN_MATCH "vlan"
 
 struct ebt_vlan_info {
-       uint16_t id;            /* VLAN ID {1-4095} */
-       uint8_t prio;           /* VLAN User Priority {0-7} */
+       __u16 id;               /* VLAN ID {1-4095} */
+       __u8 prio;              /* VLAN User Priority {0-7} */
        __be16 encap;           /* VLAN Encapsulated frame code {0-65535} */
-       uint8_t bitmask;                /* Args bitmask bit 1=1 - ID arg,
+       __u8 bitmask;           /* Args bitmask bit 1=1 - ID arg,
                                   bit 2=1 User-Priority arg, bit 3=1 encap*/
-       uint8_t invflags;               /* Inverse bitmask  bit 1=1 - inversed ID arg, 
+       __u8 invflags;          /* Inverse bitmask  bit 1=1 - inversed ID arg, 
                                   bit 2=1 - inversed Pirority arg */
 };
 
index e5a3687..3114f06 100644 (file)
@@ -17,15 +17,15 @@ struct clusterip_config;
 
 struct ipt_clusterip_tgt_info {
 
-       u_int32_t flags;
+       __u32 flags;
 
        /* only relevant for new ones */
-       u_int8_t clustermac[6];
-       u_int16_t num_total_nodes;
-       u_int16_t num_local_nodes;
-       u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
-       u_int32_t hash_mode;
-       u_int32_t hash_initval;
+       __u8 clustermac[6];
+       __u16 num_total_nodes;
+       __u16 num_local_nodes;
+       __u16 local_nodes[CLUSTERIP_MAX_NODES];
+       __u32 hash_mode;
+       __u32 hash_initval;
 
        /* Used internally by the kernel */
        struct clusterip_config *config;
index 7ca4591..c6e3e01 100644 (file)
 #define IPT_ECN_OP_MASK                0xce
 
 struct ipt_ECN_info {
-       u_int8_t operation;     /* bitset of operations */
-       u_int8_t ip_ect;        /* ECT codepoint of IPv4 header, pre-shifted */
+       __u8 operation; /* bitset of operations */
+       __u8 ip_ect;    /* ECT codepoint of IPv4 header, pre-shifted */
        union {
                struct {
-                       u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+                       __u8 ece:1, cwr:1; /* TCP ECT bits */
                } tcp;
        } proto;
 };
index 2529660..fa0ebec 100644 (file)
@@ -7,9 +7,9 @@
 
 struct ipt_same_info {
        unsigned char info;
-       u_int32_t rangesize;
-       u_int32_t ipnum;
-       u_int32_t *iparray;
+       __u32 rangesize;
+       __u32 ipnum;
+       __u32 *iparray;
 
        /* hangs off end. */
        struct nf_nat_range range[IPT_SAME_MAX_RANGE];
index ee6611e..f6250e4 100644 (file)
@@ -13,8 +13,8 @@ enum {
 #define IPT_TTL_MAXMODE        IPT_TTL_DEC
 
 struct ipt_TTL_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
 };
 
 
index 446de6a..f29c3cf 100644 (file)
@@ -9,17 +9,17 @@ enum {
 };
 
 struct ipt_addrtype_info_v1 {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       flags;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   flags;
 };
 
 /* revision 0 */
 struct ipt_addrtype_info {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       invert_source;
-       u_int32_t       invert_dest;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   invert_source;
+       __u32   invert_dest;
 };
 
 #endif
index 2e555b4..8fea283 100644 (file)
@@ -2,8 +2,8 @@
 #define _IPT_AH_H
 
 struct ipt_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 
index 9945baa..78b98aa 100644 (file)
 
 /* match info */
 struct ipt_ecn_info {
-       u_int8_t operation;
-       u_int8_t invert;
-       u_int8_t ip_ect;
+       __u8 operation;
+       __u8 invert;
+       __u8 ip_ect;
        union {
                struct {
-                       u_int8_t ect;
+                       __u8 ect;
                } tcp;
        } proto;
 };
index ee24fd8..93d9a06 100644 (file)
@@ -13,8 +13,8 @@ enum {
 
 
 struct ipt_ttl_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
 };
 
 
index afb7813..81cdaf0 100644 (file)
@@ -14,8 +14,8 @@ enum {
 #define IP6T_HL_MAXMODE        IP6T_HL_DEC
 
 struct ip6t_HL_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
 };
 
 
index 6be6504..b999aa4 100644 (file)
@@ -12,7 +12,7 @@ enum ip6t_reject_with {
 };
 
 struct ip6t_reject_info {
-       u_int32_t       with;   /* reject type */
+       __u32   with;   /* reject type */
 };
 
 #endif /*_IP6T_REJECT_H*/
index 17a745c..a602c16 100644 (file)
@@ -2,10 +2,10 @@
 #define _IP6T_AH_H
 
 struct ip6t_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  hdrres;                       /* Test of the Reserved Filed */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  hdrres;                   /* Test of the Reserved Filed */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 #define IP6T_AH_SPI 0x01
index 3724d08..538b31e 100644 (file)
@@ -2,10 +2,10 @@
 #define _IP6T_FRAG_H
 
 struct ip6t_frag {
-       u_int32_t ids[2];                       /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 ids[2];                   /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 #define IP6T_FRAG_IDS          0x01
index 5ef91b8..c6fddcb 100644 (file)
@@ -14,8 +14,8 @@ enum {
 
 
 struct ip6t_hl_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
 };
 
 
index 01dfd44..73d53bd 100644 (file)
@@ -9,9 +9,9 @@ on whether they contain certain headers */
 #define __IPV6HEADER_H
 
 struct ip6t_ipv6header_info {
-       u_int8_t matchflags;
-       u_int8_t invflags;
-       u_int8_t modeflag;
+       __u8 matchflags;
+       __u8 invflags;
+       __u8 modeflag;
 };
 
 #define MASK_HOPOPTS    128
index 18549bc..98c8cf6 100644 (file)
@@ -3,8 +3,8 @@
 
 /* MH matching stuff */
 struct ip6t_mh {
-       u_int8_t types[2];      /* MH type range */
-       u_int8_t invflags;      /* Inverse flags */
+       __u8 types[2];  /* MH type range */
+       __u8 invflags;  /* Inverse flags */
 };
 
 /* Values for "invflags" field in struct ip6t_mh. */
index 62d89bc..405d309 100644 (file)
@@ -4,11 +4,11 @@
 #define IP6T_OPTS_OPTSNR 16
 
 struct ip6t_opts {
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t flags;                         /*  */
-       u_int8_t invflags;                      /* Inverse flags */
-       u_int16_t opts[IP6T_OPTS_OPTSNR];       /* opts */
-       u_int8_t optsnr;                        /* Nr of OPts */
+       __u32 hdrlen;                   /* Header Length */
+       __u8 flags;                             /*  */
+       __u8 invflags;                  /* Inverse flags */
+       __u16 opts[IP6T_OPTS_OPTSNR];   /* opts */
+       __u8 optsnr;                    /* Nr of OPts */
 };
 
 #define IP6T_OPTS_LEN          0x01
index ab91bfd..e8dad20 100644 (file)
@@ -6,13 +6,13 @@
 #define IP6T_RT_HOPS 16
 
 struct ip6t_rt {
-       u_int32_t rt_type;                      /* Routing Type */
-       u_int32_t segsleft[2];                  /* Segments Left */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 rt_type;                  /* Routing Type */
+       __u32 segsleft[2];                      /* Segments Left */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
        struct in6_addr addrs[IP6T_RT_HOPS];    /* Hops */
-       u_int8_t addrnr;                        /* Nr of Addresses */
+       __u8 addrnr;                    /* Nr of Addresses */
 };
 
 #define IP6T_RT_TYP            0x01
index 93b0310..be5a0d4 100644 (file)
@@ -72,7 +72,7 @@ struct dst_entry {
 
        u32                     _metrics[RTAX_MAX];
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        __u32                   tclassid;
 #else
        __u32                   __pad2;
index 07bdb5e..65d1fcd 100644 (file)
@@ -55,7 +55,7 @@ struct fib_nh {
        int                     nh_weight;
        int                     nh_power;
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        __u32                   nh_tclassid;
 #endif
        int                     nh_oif;
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
 extern int __net_init fib4_rules_init(struct net *net);
 extern void __net_exit fib4_rules_exit(struct net *net);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
 
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id);
 
 static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        u32 rtag;
 #endif
index b7bbd6c..b23bea6 100644 (file)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
+#include <net/net_namespace.h>         /* Netw namespace */
+
+/*
+ * Generic access of ipvs struct
+ */
+static inline struct netns_ipvs *net_ipvs(struct net* net)
+{
+       return net->ipvs;
+}
+/*
+ * Get net ptr from skb in traffic cases
+ * use skb_sknet when call is from userland (ioctl or netlink)
+ */
+static inline struct net *skb_net(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /*
+        * This is used for debug only.
+        * Start with the most likely hit
+        * End with BUG
+        */
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       if (skb_dst(skb)->dev)
+               return dev_net(skb_dst(skb)->dev);
+       WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
+                     __func__, __LINE__);
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return dev_net(skb->dev ? : skb_dst(skb)->dev);
+#endif
+#else
+       return &init_net;
+#endif
+}
+
+static inline struct net *skb_sknet(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /* Start with the most likely hit */
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
+                      __func__, __LINE__);
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return sock_net(skb->sk);
+#endif
+#else
+       return &init_net;
+#endif
+}
+/*
+ * This one needed for single_open_net since net is stored directly in
+ * private not as a struct i.e. seq_file_net cant be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+       return (struct net *)seq->private;
+#else
+       return &init_net;
+#endif
+}
 
 /* Connections' size value needed by ip_vs_ctl.c */
 extern int ip_vs_conn_tab_size;
@@ -258,6 +332,23 @@ struct ip_vs_seq {
                                                   before last resized pkt */
 };
 
+/*
+ * counters per cpu
+ */
+struct ip_vs_counters {
+       __u32           conns;          /* connections scheduled */
+       __u32           inpkts;         /* incoming packets */
+       __u32           outpkts;        /* outgoing packets */
+       __u64           inbytes;        /* incoming bytes */
+       __u64           outbytes;       /* outgoing bytes */
+};
+/*
+ * Stats per cpu
+ */
+struct ip_vs_cpu_stats {
+       struct ip_vs_counters   ustats;
+       struct u64_stats_sync   syncp;
+};
 
 /*
  *     IPVS statistics objects
@@ -279,17 +370,34 @@ struct ip_vs_estimator {
 };
 
 struct ip_vs_stats {
-       struct ip_vs_stats_user ustats;         /* statistics */
+       struct ip_vs_stats_user ustats;         /* statistics */
        struct ip_vs_estimator  est;            /* estimator */
-
-       spinlock_t              lock;           /* spin lock */
+       struct ip_vs_cpu_stats  *cpustats;      /* per cpu counters */
+       spinlock_t              lock;           /* spin lock */
 };
 
+/*
+ * Helper Macros for per cpu
+ * ipvs->tot_stats->ustats.count
+ */
+#define IPVS_STAT_INC(ipvs, count)     \
+       __this_cpu_inc((ipvs)->ustats->count)
+
+#define IPVS_STAT_ADD(ipvs, count, value) \
+       do {\
+               write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
+                                    raw_smp_processor_id())); \
+               __this_cpu_add((ipvs)->ustats->count, value); \
+               write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
+                                  raw_smp_processor_id())); \
+       } while (0)
+
 struct dst_entry;
 struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
 struct sk_buff;
+struct ip_vs_proto_data;
 
 struct ip_vs_protocol {
        struct ip_vs_protocol   *next;
@@ -297,21 +405,22 @@ struct ip_vs_protocol {
        u16                     protocol;
        u16                     num_states;
        int                     dont_defrag;
-       atomic_t                appcnt;         /* counter of proto app incs */
-       int                     *timeout_table; /* protocol timeout table */
 
        void (*init)(struct ip_vs_protocol *pp);
 
        void (*exit)(struct ip_vs_protocol *pp);
 
+       void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+       void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
        int (*conn_schedule)(int af, struct sk_buff *skb,
-                            struct ip_vs_protocol *pp,
+                            struct ip_vs_proto_data *pd,
                             int *verdict, struct ip_vs_conn **cpp);
 
        struct ip_vs_conn *
        (*conn_in_get)(int af,
                       const struct sk_buff *skb,
-                      struct ip_vs_protocol *pp,
                       const struct ip_vs_iphdr *iph,
                       unsigned int proto_off,
                       int inverse);
@@ -319,7 +428,6 @@ struct ip_vs_protocol {
        struct ip_vs_conn *
        (*conn_out_get)(int af,
                        const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                        const struct ip_vs_iphdr *iph,
                        unsigned int proto_off,
                        int inverse);
@@ -337,11 +445,11 @@ struct ip_vs_protocol {
 
        int (*state_transition)(struct ip_vs_conn *cp, int direction,
                                const struct sk_buff *skb,
-                               struct ip_vs_protocol *pp);
+                               struct ip_vs_proto_data *pd);
 
-       int (*register_app)(struct ip_vs_app *inc);
+       int (*register_app)(struct net *net, struct ip_vs_app *inc);
 
-       void (*unregister_app)(struct ip_vs_app *inc);
+       void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
 
        int (*app_conn_bind)(struct ip_vs_conn *cp);
 
@@ -350,14 +458,26 @@ struct ip_vs_protocol {
                             int offset,
                             const char *msg);
 
-       void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
+       void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
+};
 
-       int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+       struct ip_vs_proto_data *next;
+       struct ip_vs_protocol   *pp;
+       int                     *timeout_table; /* protocol timeout table */
+       atomic_t                appcnt;         /* counter of proto app incs. */
+       struct tcp_states_t     *tcp_state_table;
 };
 
-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
+                                                    unsigned short proto);
 
 struct ip_vs_conn_param {
+       struct net                      *net;
        const union nf_inet_addr        *caddr;
        const union nf_inet_addr        *vaddr;
        __be16                          cport;
@@ -375,16 +495,19 @@ struct ip_vs_conn_param {
  */
 struct ip_vs_conn {
        struct list_head        c_list;         /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+       struct net              *net;           /* Name space */
+#endif
        /* Protocol, addresses and port numbers */
-       u16                      af;            /* address family */
-       union nf_inet_addr       caddr;          /* client address */
-       union nf_inet_addr       vaddr;          /* virtual address */
-       union nf_inet_addr       daddr;          /* destination address */
-       volatile __u32           flags;          /* status flags */
-       __be16                   cport;
-       __be16                   vport;
-       __be16                   dport;
+       u16                     af;             /* address family */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __u32                   fwmark;         /* Fire wall mark from skb */
+       union nf_inet_addr      caddr;          /* client address */
+       union nf_inet_addr      vaddr;          /* virtual address */
+       union nf_inet_addr      daddr;          /* destination address */
+       volatile __u32          flags;          /* status flags */
        __u16                   protocol;       /* Which protocol (TCP/UDP) */
 
        /* counter and timer */
@@ -422,10 +545,38 @@ struct ip_vs_conn {
        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 
+       const struct ip_vs_pe   *pe;
        char                    *pe_data;
        __u8                    pe_data_len;
 };
 
+/*
+ *  To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net;
+#else
+       return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+                                   struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net == net;
+#else
+       return 1;
+#endif
+}
 
 /*
  *     Extended internal versions of struct ip_vs_service_user and
@@ -485,6 +636,7 @@ struct ip_vs_service {
        unsigned                flags;    /* service status flags */
        unsigned                timeout;  /* persistent timeout in ticks */
        __be32                  netmask;  /* grouping granularity */
+       struct net              *net;
 
        struct list_head        destinations;  /* real server d-linked list */
        __u32                   num_dests;     /* number of servers */
@@ -510,8 +662,8 @@ struct ip_vs_dest {
        struct list_head        d_list;   /* for table with all the dests */
 
        u16                     af;             /* address family */
-       union nf_inet_addr      addr;           /* IP address of the server */
        __be16                  port;           /* port number of the server */
+       union nf_inet_addr      addr;           /* IP address of the server */
        volatile unsigned       flags;          /* dest status flags */
        atomic_t                conn_flags;     /* flags to copy to conn */
        atomic_t                weight;         /* server weight */
@@ -538,8 +690,8 @@ struct ip_vs_dest {
        /* for virtual service */
        struct ip_vs_service    *svc;           /* service it belongs to */
        __u16                   protocol;       /* which protocol (TCP/UDP) */
-       union nf_inet_addr      vaddr;          /* virtual IP address */
        __be16                  vport;          /* virtual port number */
+       union nf_inet_addr      vaddr;          /* virtual IP address */
        __u32                   vfwmark;        /* firewall mark of service */
 };
 
@@ -674,13 +826,14 @@ enum {
        IP_VS_DIR_LAST,
 };
 
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
                                         const union nf_inet_addr *caddr,
                                         __be16 cport,
                                         const union nf_inet_addr *vaddr,
                                         __be16 vport,
                                         struct ip_vs_conn_param *p)
 {
+       p->net = net;
        p->af = af;
        p->protocol = protocol;
        p->caddr = caddr;
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
 struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                                           struct ip_vs_protocol *pp,
                                            const struct ip_vs_iphdr *iph,
                                            unsigned int proto_off,
                                            int inverse);
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                                            struct ip_vs_protocol *pp,
                                             const struct ip_vs_iphdr *iph,
                                             unsigned int proto_off,
                                             int inverse);
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
                                  const union nf_inet_addr *daddr,
                                  __be16 dport, unsigned flags,
-                                 struct ip_vs_dest *dest);
+                                 struct ip_vs_dest *dest, __u32 fwmark);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
 extern const char * ip_vs_state_name(__u16 proto, int state);
 
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
 extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
 extern int ip_vs_conn_init(void);
 extern void ip_vs_conn_cleanup(void);
 
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+                                 __u16 proto, __u16 port);
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
 void ip_vs_unbind_pe(struct ip_vs_service *svc);
 int register_ip_vs_pe(struct ip_vs_pe *pe);
 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
-extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
-extern void ip_vs_pe_put(struct ip_vs_pe *pe);
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
+
+static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               __module_get(pe->module);
+}
+
+static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               module_put(pe->module);
+}
 
 /*
  *     IPVS protocol functions (from ip_vs_proto.c)
  */
 extern int ip_vs_protocol_init(void);
 extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
 extern int *ip_vs_create_timeout_table(int *table, int size);
 extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
 extern struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored);
+              struct ip_vs_proto_data *pd, int *ignored);
 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-                       struct ip_vs_protocol *pp);
+                       struct ip_vs_proto_data *pd);
 
 
 /*
  *      IPVS control data and functions (from ip_vs_ctl.c)
  */
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
 
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
 extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport);
 
 static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }
 
 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                          const union nf_inet_addr *daddr, __be16 dport);
 
 extern int ip_vs_use_count_inc(void);
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
-               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
+               __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+               __u16 protocol, __u32 fwmark);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
  */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+                            __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);
 
 
 /*
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
 
 /*
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
 extern int ip_vs_drop_rate;
 extern int ip_vs_drop_counter;
 
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 {
-       if (!ip_vs_drop_rate) return 0;
-       if (--ip_vs_drop_counter > 0) return 0;
-       ip_vs_drop_counter = ip_vs_drop_rate;
+       if (!ipvs->drop_rate)
+               return 0;
+       if (--ipvs->drop_counter > 0)
+               return 0;
+       ipvs->drop_counter = ipvs->drop_rate;
        return 1;
 }
 
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
  *      Netfilter connection tracking
  *      (from ip_vs_nfct.c)
  */
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
-       return sysctl_ip_vs_conntrack;
+       return ipvs->sysctl_conntrack;
 }
 
 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
 
 #else
 
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
        return 0;
 }
index 1bf812b..b3b4a34 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/netns/conntrack.h>
 #endif
 #include <net/netns/xfrm.h>
+#include <net/netns/ip_vs.h>
 
 struct proc_dir_entry;
 struct net_device;
@@ -94,6 +95,7 @@ struct net {
 #ifdef CONFIG_XFRM
        struct netns_xfrm       xfrm;
 #endif
+       struct netns_ipvs       *ipvs;
 };
 
 
index d85cff1..d0d1337 100644 (file)
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
 /* per conntrack: application helper private data */
 union nf_conntrack_help {
        /* insert conntrack helper private data (master) here */
+#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
        struct nf_ct_ftp_master ct_ftp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
+    defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
        struct nf_ct_pptp_master ct_pptp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_H323) || \
+    defined(CONFIG_NF_CONNTRACK_H323_MODULE)
        struct nf_ct_h323_master ct_h323_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SANE) || \
+    defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
        struct nf_ct_sane_master ct_sane_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
        struct nf_ct_sip_master ct_sip_info;
+#endif
 };
 
 #include <linux/types.h>
@@ -116,14 +129,14 @@ struct nf_conn {
        u_int32_t secmark;
 #endif
 
-       /* Storage reserved for other modules: */
-       union nf_conntrack_proto proto;
-
        /* Extensions */
        struct nf_ct_ext *ext;
 #ifdef CONFIG_NET_NS
        struct net *ct_net;
 #endif
+
+       /* Storage reserved for other modules, must be the last member */
+       union nf_conntrack_proto proto;
 };
 
 static inline struct nf_conn *
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
  * Allocate a hashtable of hlist_head (if nulls == 0),
  * or hlist_nulls_head (if nulls == 1)
  */
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
 
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
 
 extern struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(struct net *net, u16 zone,
index 96ba5f7..8fdb04b 100644 (file)
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_find(const struct nf_conn *ct)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+#else
+       return NULL;
+#endif
 }
 
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        struct net *net = nf_ct_net(ct);
        struct nf_conntrack_ecache *e;
 
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
                e->expmask = expmask;
        }
        return e;
+#else
+       return NULL;
+#endif
 };
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
        int (*fcn)(unsigned int events, struct nf_ct_event *item);
 };
 
-extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
 extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
 extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
        int (*fcn)(unsigned int events, struct nf_exp_event *item);
 };
 
-extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
 extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
 extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
index 0772d29..2dcf317 100644 (file)
@@ -7,10 +7,19 @@
 
 enum nf_ct_ext_id {
        NF_CT_EXT_HELPER,
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        NF_CT_EXT_NAT,
+#endif
        NF_CT_EXT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        NF_CT_EXT_ECACHE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
        NF_CT_EXT_ZONE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       NF_CT_EXT_TSTAMP,
+#endif
        NF_CT_EXT_NUM,
 };
 
@@ -19,6 +28,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
index 32c305d..f1c1311 100644 (file)
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 extern int nf_conntrack_helper_init(void);
 extern void nf_conntrack_helper_fini(void);
 
+extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
+                                      unsigned int protoff,
+                                      struct nf_conn *ct,
+                                      enum ip_conntrack_info ctinfo,
+                                      unsigned int timeout);
+
 #endif /*_NF_CONNTRACK_HELPER_H*/
index a754761..e8010f4 100644 (file)
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
        struct module *me;
 };
 
-extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
 
 /* Protocol registration. */
 extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
new file mode 100644 (file)
index 0000000..f17dcb6
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef _NF_CONNTRACK_TSTAMP_H
+#define _NF_CONNTRACK_TSTAMP_H
+
+#include <net/net_namespace.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_tstamp {
+       u_int64_t start;
+       u_int64_t stop;
+};
+
+static inline
+struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
+#else
+       return NULL;
+#endif
+}
+
+static inline
+struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       struct net *net = nf_ct_net(ct);
+
+       if (!net->ct.sysctl_tstamp)
+               return NULL;
+
+       return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
+#else
+       return NULL;
+#endif
+};
+
+static inline bool nf_ct_tstamp_enabled(struct net *net)
+{
+       return net->ct.sysctl_tstamp != 0;
+}
+
+static inline void nf_ct_set_tstamp(struct net *net, bool enable)
+{
+       net->ct.sysctl_tstamp = enable;
+}
+
+extern int nf_conntrack_tstamp_init(struct net *net);
+extern void nf_conntrack_tstamp_fini(struct net *net);
+
+#endif /* _NF_CONNTRACK_TSTAMP_H */
index f5f09f0..aff80b1 100644 (file)
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help {
        /* insert nat helper private data here */
+#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
        struct nf_nat_pptp nat_pptp_info;
+#endif
 };
 
 struct nf_conn;
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+#else
+       return NULL;
+#endif
 }
 
 #else  /* !__KERNEL__: iptables wants this to compile. */
index 33602ab..3dc7b98 100644 (file)
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
                                     enum nf_nat_manip_type manip)
 {
        if (manip == IP_NAT_MANIP_SRC)
-               return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_SRC_NAT_DONE;
        else
-               return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_DST_NAT_DONE;
 }
 
 struct nlattr;
index d4958d4..341eb08 100644 (file)
@@ -21,15 +21,15 @@ struct netns_ct {
        int                     sysctl_events;
        unsigned int            sysctl_events_retry_timeout;
        int                     sysctl_acct;
+       int                     sysctl_tstamp;
        int                     sysctl_checksum;
        unsigned int            sysctl_log_invalid; /* Log invalid packets */
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *sysctl_header;
        struct ctl_table_header *acct_sysctl_header;
+       struct ctl_table_header *tstamp_sysctl_header;
        struct ctl_table_header *event_sysctl_header;
 #endif
-       int                     hash_vmalloc;
-       int                     expect_vmalloc;
        char                    *slabname;
 };
 #endif
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644 (file)
index 0000000..259ebac
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ *  IP Virtual Server
+ *  Data structure for network namspace
+ *
+ */
+
+#ifndef IP_VS_H_
+#define IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+       int                     gen;            /* Generation */
+       /*
+        *      Hash table: for real service lookups
+        */
+       #define IP_VS_RTAB_BITS 4
+       #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+       #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+       struct list_head        rs_table[IP_VS_RTAB_SIZE];
+       /* ip_vs_app */
+       struct list_head        app_list;
+       struct mutex            app_mutex;
+       struct lock_class_key   app_key;        /* mutex debuging */
+
+       /* ip_vs_proto */
+       #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
+       struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+       /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+       #define TCP_APP_TAB_BITS        4
+       #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
+       #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
+       struct list_head        tcp_apps[TCP_APP_TAB_SIZE];
+       spinlock_t              tcp_app_lock;
+#endif
+       /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+       #define UDP_APP_TAB_BITS        4
+       #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
+       #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
+       struct list_head        udp_apps[UDP_APP_TAB_SIZE];
+       spinlock_t              udp_app_lock;
+#endif
+       /* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+       #define SCTP_APP_TAB_BITS       4
+       #define SCTP_APP_TAB_SIZE       (1 << SCTP_APP_TAB_BITS)
+       #define SCTP_APP_TAB_MASK       (SCTP_APP_TAB_SIZE - 1)
+       /* Hash table for SCTP application incarnations  */
+       struct list_head        sctp_apps[SCTP_APP_TAB_SIZE];
+       spinlock_t              sctp_app_lock;
+#endif
+       /* ip_vs_conn */
+       atomic_t                conn_count;      /*  connection counter */
+
+       /* ip_vs_ctl */
+       struct ip_vs_stats              *tot_stats;  /* Statistics & est. */
+       struct ip_vs_cpu_stats __percpu *cpustats;   /* Stats per cpu */
+       seqcount_t                      *ustats_seq; /* u64 read retry */
+
+       int                     num_services;    /* no of virtual services */
+       /* 1/rate drop and drop-entry variables */
+       struct delayed_work     defense_work;   /* Work handler */
+       int                     drop_rate;
+       int                     drop_counter;
+       atomic_t                dropentry;
+       /* locks in ctl.c */
+       spinlock_t              dropentry_lock;  /* drop entry handling */
+       spinlock_t              droppacket_lock; /* drop packet handling */
+       spinlock_t              securetcp_lock;  /* state and timeout tables */
+       rwlock_t                rs_lock;         /* real services table */
+       /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+       struct lock_class_key   ctl_key;        /* ctl_mutex debuging */
+       /* Trash for destinations */
+       struct list_head        dest_trash;
+       /* Service counters */
+       atomic_t                ftpsvc_counter;
+       atomic_t                nullsvc_counter;
+
+       /* sys-ctl struct */
+       struct ctl_table_header *sysctl_hdr;
+       struct ctl_table        *sysctl_tbl;
+       /* sysctl variables */
+       int                     sysctl_amemthresh;
+       int                     sysctl_am_droprate;
+       int                     sysctl_drop_entry;
+       int                     sysctl_drop_packet;
+       int                     sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+       int                     sysctl_conntrack;
+#endif
+       int                     sysctl_snat_reroute;
+       int                     sysctl_sync_ver;
+       int                     sysctl_cache_bypass;
+       int                     sysctl_expire_nodest_conn;
+       int                     sysctl_expire_quiescent_template;
+       int                     sysctl_sync_threshold[2];
+       int                     sysctl_nat_icmp_send;
+
+       /* ip_vs_lblc */
+       int                     sysctl_lblc_expiration;
+       struct ctl_table_header *lblc_ctl_header;
+       struct ctl_table        *lblc_ctl_table;
+       /* ip_vs_lblcr */
+       int                     sysctl_lblcr_expiration;
+       struct ctl_table_header *lblcr_ctl_header;
+       struct ctl_table        *lblcr_ctl_table;
+       /* ip_vs_est */
+       struct list_head        est_list;       /* estimator list */
+       spinlock_t              est_lock;
+       struct timer_list       est_timer;      /* Estimation timer */
+       /* ip_vs_sync */
+       struct list_head        sync_queue;
+       spinlock_t              sync_lock;
+       struct ip_vs_sync_buff  *sync_buff;
+       spinlock_t              sync_buff_lock;
+       struct sockaddr_in      sync_mcast_addr;
+       struct task_struct      *master_thread;
+       struct task_struct      *backup_thread;
+       int                     send_mesg_maxlen;
+       int                     recv_mesg_maxlen;
+       volatile int            sync_state;
+       volatile int            master_syncid;
+       volatile int            backup_syncid;
+       /* multicast interface name */
+       char                    master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       char                    backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       /* net name space ptr */
+       struct net              *net;            /* Needed by timer routines */
+};
+
+#endif /* IP_VS_H_ */
index d68c3f1..e2e2ef5 100644 (file)
@@ -43,7 +43,6 @@ struct netns_ipv4 {
        struct xt_table         *nat_table;
        struct hlist_head       *nat_bysource;
        unsigned int            nat_htable_size;
-       int                     nat_vmalloced;
 #endif
 
        int sysctl_icmp_echo_ignore_all;
index e495624..162e88e 100644 (file)
@@ -74,6 +74,8 @@ static int    audit_initialized;
 int            audit_enabled;
 int            audit_ever_enabled;
 
+EXPORT_SYMBOL_GPL(audit_enabled);
+
 /* Default state when kernel boots without any parameters. */
 static int     audit_default;
 
index 50a46af..2ed0056 100644 (file)
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip6.h>
 
-struct tcpudphdr {
-       __be16 src;
-       __be16 dst;
+union pkthdr {
+       struct {
+               __be16 src;
+               __be16 dst;
+       } tcpudphdr;
+       struct {
+               u8 type;
+               u8 code;
+       } icmphdr;
 };
 
 static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct ebt_ip6_info *info = par->matchinfo;
        const struct ipv6hdr *ih6;
        struct ipv6hdr _ip6h;
-       const struct tcpudphdr *pptr;
-       struct tcpudphdr _ports;
+       const union pkthdr *pptr;
+       union pkthdr _pkthdr;
 
        ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
        if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
                        return false;
                if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
                        return false;
-               if (!(info->bitmask & EBT_IP6_DPORT) &&
-                   !(info->bitmask & EBT_IP6_SPORT))
+               if (!(info->bitmask & ( EBT_IP6_DPORT |
+                                       EBT_IP6_SPORT | EBT_IP6_ICMP6)))
                        return true;
-               pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
-                                         &_ports);
+
+               /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+               pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+                                         &_pkthdr);
                if (pptr == NULL)
                        return false;
                if (info->bitmask & EBT_IP6_DPORT) {
-                       u32 dst = ntohs(pptr->dst);
+                       u16 dst = ntohs(pptr->tcpudphdr.dst);
                        if (FWINV(dst < info->dport[0] ||
                                  dst > info->dport[1], EBT_IP6_DPORT))
                                return false;
                }
                if (info->bitmask & EBT_IP6_SPORT) {
-                       u32 src = ntohs(pptr->src);
+                       u16 src = ntohs(pptr->tcpudphdr.src);
                        if (FWINV(src < info->sport[0] ||
                                  src > info->sport[1], EBT_IP6_SPORT))
                        return false;
                }
-               return true;
+               if ((info->bitmask & EBT_IP6_ICMP6) &&
+                    FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+                          pptr->icmphdr.type > info->icmpv6_type[1] ||
+                          pptr->icmphdr.code < info->icmpv6_code[0] ||
+                          pptr->icmphdr.code > info->icmpv6_code[1],
+                                                       EBT_IP6_ICMP6))
+                       return false;
        }
        return true;
 }
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
                return -EINVAL;
        if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
                return -EINVAL;
+       if (info->bitmask & EBT_IP6_ICMP6) {
+               if ((info->invflags & EBT_IP6_PROTO) ||
+                    info->protocol != IPPROTO_ICMPV6)
+                       return -EINVAL;
+               if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+                   info->icmpv6_code[0] > info->icmpv6_code[1])
+                       return -EINVAL;
+       }
        return 0;
 }
 
index 16df053..5f1825d 100644 (file)
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
 
        newinfo->entries_size = size;
 
+       xt_compat_init_offsets(AF_INET, info->nentries);
        return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
                                                        entries, newinfo);
 }
index a5a1050..8949a05 100644 (file)
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
          handled by the klogd daemon which is responsible for kernel messages
          ("man klogd").
 
+config IP_ROUTE_CLASSID
+       bool
+
 config IP_PNP
        bool "IP: kernel level autoconfiguration"
        help
@@ -657,4 +660,3 @@ config TCP_MD5SIG
          on the Internet.
 
          If unsure, say N.
-
index 7981a24..9cefe72 100644 (file)
@@ -41,12 +41,12 @@ struct fib4_rule {
        __be32                  srcmask;
        __be32                  dst;
        __be32                  dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        u32                     tclassid;
 #endif
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 u32 fib_rules_tclass(struct fib_result *res)
 {
        return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        if (frh->dst_len)
                rule4->dst = nla_get_be32(tb[FRA_DST]);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (tb[FRA_FLOW])
                rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
        if (frh->tos && (rule4->tos != frh->tos))
                return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
                return 0;
 #endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
        if (rule4->src_len)
                NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (rule4->tclassid)
                NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
 #endif
index 12d3dc3..9aff11d 100644 (file)
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
                    nh->nh_weight != onh->nh_weight ||
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
                    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        if (nla && nla_get_be32(nla) != nh->nh_gw)
                                return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        if (nla && nla_get_u32(nla) != nh->nh_tclassid)
                                return 1;
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                        goto err_inval;
                if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
                        goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
                        goto err_inval;
 #endif
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                nh->nh_oif = cfg->fc_oif;
                nh->nh_gw = cfg->fc_gw;
                nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                nh->nh_tclassid = cfg->fc_flow;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
                if (fi->fib_nh->nh_oif)
                        NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                if (fi->fib_nh[0].nh_tclassid)
                        NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
                        if (nh->nh_gw)
                                NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        if (nh->nh_tclassid)
                                NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
index d859bcc..d7b2b09 100644 (file)
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
                }
        }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (unlikely(skb_dst(skb)->tclassid)) {
                struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
                u32 idx = skb_dst(skb)->tclassid;
index babd1a2..f926a31 100644 (file)
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
 
 config NF_NAT_SNMP_BASIC
        tristate "Basic SNMP-ALG support"
-       depends on NF_NAT
+       depends on NF_CONNTRACK_SNMP && NF_NAT
        depends on NETFILTER_ADVANCED
+       default NF_NAT && NF_CONNTRACK_SNMP
        ---help---
 
          This module implements an Application Layer Gateway (ALG) for
index e855fff..e95054c 100644 (file)
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(NFPROTO_ARP, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(NFPROTO_ARP);
+       xt_compat_init_offsets(NFPROTO_ARP, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 652efea..ef7d7b9 100644 (file)
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(AF_INET);
+       xt_compat_init_offsets(AF_INET, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 1e26a48..403ca57 100644 (file)
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
         * that the ->target() function isn't called after ->destroy() */
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct == NULL) {
-               pr_info("no conntrack!\n");
-                       /* FIXME: need to drop invalid ones, since replies
-                        * to outgoing connections of other nodes will be
-                        * marked as INVALID */
+       if (ct == NULL)
                return NF_DROP;
-       }
 
        /* special case: ICMP error handling. conntrack distinguishes between
         * error messages (RELATED) and information requests (see below) */
index 72ffc8f..d76d6c9 100644 (file)
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
        }
 #endif
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, 0);
index 63f60fc..5585980 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
 
 struct ct_iter_state {
        struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
        for (st->bucket = 0;
             st->bucket < net->ct.htable_size;
             st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                if (!is_a_nulls(n))
                        return n;
        }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
        while (is_a_nulls(head)) {
                if (likely(get_nulls_value(head) == st->bucket)) {
                        if (++st->bucket >= net->ct.htable_size)
                                return NULL;
                }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
        }
        return head;
 }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
        struct hlist_node *n;
 
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                if (n)
                        return n;
        }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
        }
        return head;
 }
index 0f23b3f..703f366 100644 (file)
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
 
        /* Try to get same port: if not, try to change it. */
        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-               int ret;
+               int res;
 
                exp->tuple.dst.u.tcp.port = htons(port);
-               ret = nf_ct_expect_related(exp);
-               if (ret == 0)
+               res = nf_ct_expect_related(exp);
+               if (res == 0)
                        break;
-               else if (ret != -EBUSY) {
+               else if (res != -EBUSY) {
                        port = 0;
                        break;
                }
index c04787c..3002c04 100644 (file)
@@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
        /* It's done. */
        if (maniptype == IP_NAT_MANIP_DST)
-               set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_DST_NAT_DONE;
        else
-               set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_SRC_NAT_DONE;
 
        return NF_ACCEPT;
 }
@@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
        int ret = 0;
 
        spin_lock_bh(&nf_nat_lock);
-       if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+       if (rcu_dereference_protected(
+                       nf_nat_protos[proto->protonum],
+                       lockdep_is_held(&nf_nat_lock)
+                       ) != &nf_nat_unknown_protocol) {
                ret = -EBUSY;
                goto out;
        }
@@ -679,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net)
 {
        /* Leave them the same for the moment. */
        net->ipv4.nat_htable_size = net->ct.htable_size;
-       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
-                                                      &net->ipv4.nat_vmalloced, 0);
+       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
        if (!net->ipv4.nat_bysource)
                return -ENOMEM;
        return 0;
@@ -702,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
        nf_ct_iterate_cleanup(net, &clean_nat, NULL);
        synchronize_rcu();
-       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-                            net->ipv4.nat_htable_size);
+       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
index ee5f419..8812a02 100644 (file)
@@ -54,6 +54,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
 {
        int ret = 0;
 
-       ret = nf_conntrack_helper_register(&snmp_helper);
-       if (ret < 0)
-               return ret;
+       BUG_ON(nf_nat_snmp_hook != NULL);
+       rcu_assign_pointer(nf_nat_snmp_hook, help);
+
        ret = nf_conntrack_helper_register(&snmp_trap_helper);
        if (ret < 0) {
                nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
 
 static void __exit nf_nat_snmp_basic_fini(void)
 {
-       nf_conntrack_helper_unregister(&snmp_helper);
+       rcu_assign_pointer(nf_nat_snmp_hook, NULL);
        nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
index 351dc4e..3e5b7cc 100644 (file)
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = {
        .release = seq_release,
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
        struct ip_rt_acct *dst, *src;
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
        if (!pde)
                goto err2;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
        if (!pde)
                goto err3;
 #endif
        return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 err3:
        remove_proc_entry("rt_cache", net->proc_net_stat);
 #endif
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 {
        remove_proc_entry("rt_cache", net->proc_net_stat);
        remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        remove_proc_entry("rt_acct", net->proc_net);
 #endif
 }
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
        memcpy(addr, &src, 4);
 }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
        if (!(rt->dst.tclassid & 0xFFFF))
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
                    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
                        rt->rt_gateway = FIB_RES_GW(*res);
                dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
        }
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
        if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
                dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        set_class_tag(rt, fib_rules_tclass(res));
 #endif
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->fl.mark    = skb->mark;
        rth->fl.fl4_src = saddr;
        rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
 #endif
        rth->rt_iif     =
@@ -2208,7 +2208,7 @@ local_input:
        rth->fl.mark    = skb->mark;
        rth->fl.fl4_src = saddr;
        rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
 #endif
        rth->rt_iif     =
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net,
        }
        if (rt->dst.dev)
                NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (rt->dst.tclassid)
                NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
 };
 
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
 
 static __initdata unsigned long rhash_entries;
 static int __init set_rhash_entries(char *str)
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void)
 {
        int rc = 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
        if (!ip_rt_acct)
                panic("IP: failed to allocate ip_rt_acct\n");
index 7d227c6..47b7b8d 100644 (file)
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET6, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(AF_INET6);
+       xt_compat_init_offsets(AF_INET6, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 09c8889..05027b7 100644 (file)
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
               in ? in->name : "",
               out ? out->name : "");
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
index 79d43aa..66e003e 100644 (file)
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_frag6_timeout",
                .data           = &nf_init_frags.timeout,
index 1534f2b..faf7412 100644 (file)
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
 
          If unsure, say `N'.
 
+config NF_CONNTRACK_TIMESTAMP
+       bool  'Connection tracking timestamping'
+       depends on NETFILTER_ADVANCED
+       help
+         This option enables support for connection tracking timestamping.
+         This allows you to store the flow start-time and to obtain
+         the flow-stop time (once it has been destroyed) via Connection
+         tracking events.
+
+         If unsure, say `N'.
+
 config NF_CT_PROTO_DCCP
        tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
        depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_BROADCAST
+       tristate
+
 config NF_CONNTRACK_NETBIOS_NS
        tristate "NetBIOS name service protocol support"
        depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
        help
          NetBIOS name service requests are sent as broadcast messages from an
          unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_SNMP
+       tristate "SNMP service protocol support"
+       depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
+       help
+         SNMP service requests are sent as broadcast messages from an
+         unprivileged port and responded to with unicast messages to the
+         same port. This make them hard to firewall properly because connection
+         tracking doesn't deal with broadcasts. This helper tracks locally
+         originating SNMP service requests and the corresponding
+         responses. It relies on correct IP address configuration, specifically
+         netmask and broadcast address.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CONNTRACK_PPTP
        tristate "PPtP protocol support"
        depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_AUDIT
+       tristate "AUDIT target support"
+       depends on AUDIT
+       depends on NETFILTER_ADVANCED
+       ---help---
+         This option adds a 'AUDIT' target, which can be used to create
+         audit records for packets dropped/accepted.
+
+         To compileit as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_CHECKSUM
        tristate "CHECKSUM target support"
        depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
 config NETFILTER_XT_TARGET_NFQUEUE
        tristate '"NFQUEUE" target Support'
        depends on NETFILTER_ADVANCED
+       select NETFILTER_NETLINK_QUEUE
        help
          This target replaced the old obsolete QUEUE target.
 
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
 config NETFILTER_XT_MATCH_REALM
        tristate  '"realm" match support'
        depends on NETFILTER_ADVANCED
-       select NET_CLS_ROUTE
+       select IP_ROUTE_CLASSID
        help
          This option adds a `realm' match, which allows you to use the realm
          key from the routing subsystem inside iptables.
index 441050f..9ae6878 100644 (file)
@@ -1,6 +1,7 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
 nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
 obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
 obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
index 32fcbe2..1e00bf7 100644 (file)
@@ -175,13 +175,21 @@ next_hook:
                ret = 1;
        } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
                kfree_skb(skb);
-               ret = -(verdict >> NF_VERDICT_BITS);
+               ret = NF_DROP_GETERR(verdict);
                if (ret == 0)
                        ret = -EPERM;
        } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-                             verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+                              verdict >> NF_VERDICT_QBITS);
+               if (ret < 0) {
+                       if (ret == -ECANCELED)
+                               goto next_hook;
+                       if (ret == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
+               ret = 0;
        }
        rcu_read_unlock();
        return ret;
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 EXPORT_SYMBOL(nf_ct_destroy);
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct)
index a475ede..5c48ffb 100644 (file)
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
 /*
  *     Get an ip_vs_app object
  */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
  *     Allocate/initialize app incarnation and register it in proto apps.
  */
 static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+                 __u16 port)
 {
        struct ip_vs_protocol *pp;
        struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
                }
        }
 
-       ret = pp->register_app(inc);
+       ret = pp->register_app(net, inc);
        if (ret)
                goto out;
 
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
  *     Release app incarnation
  */
 static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 {
        struct ip_vs_protocol *pp;
 
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
                return;
 
        if (pp->unregister_app)
-               pp->unregister_app(inc);
+               pp->unregister_app(net, inc);
 
        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
                  pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
  *     Register an application incarnation in protocol applications
  */
 int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+                      __u16 port)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        int result;
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
-       result = ip_vs_app_inc_new(app, proto, port);
+       result = ip_vs_app_inc_new(net, app, proto, port);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        return result;
 }
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
 /*
  *     ip_vs_app registration routine
  */
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        /* increase the module use count */
        ip_vs_use_count_inc();
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
-       list_add(&app->a_list, &ip_vs_app_list);
+       list_add(&app->a_list, &ipvs->app_list);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        return 0;
 }
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
  *     ip_vs_app unregistration routine
  *     We are sure there are no app incarnations attached to services
  */
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct ip_vs_app *inc, *nxt;
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
        list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-               ip_vs_app_inc_release(inc);
+               ip_vs_app_inc_release(net, inc);
        }
 
        list_del(&app->a_list);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        /* decrease the module use count */
        ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 /*
  *     Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+                  struct ip_vs_protocol *pp)
 {
        return pp->app_conn_bind(cp);
 }
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
  *     /proc/net/ip_vs_app entry function
  */
 
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 {
        struct ip_vs_app *app, *inc;
 
-       list_for_each_entry(app, &ip_vs_app_list, a_list) {
+       list_for_each_entry(app, &ipvs->app_list, a_list) {
                list_for_each_entry(inc, &app->incs_list, a_list) {
                        if (pos-- == 0)
                                return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
 
 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       mutex_lock(&__ip_vs_app_mutex);
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+       mutex_lock(&ipvs->app_mutex);
+
+       return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct ip_vs_app *inc, *app;
        struct list_head *e;
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        ++*pos;
        if (v == SEQ_START_TOKEN)
-               return ip_vs_app_idx(0);
+               return ip_vs_app_idx(ipvs, 0);
 
        inc = v;
        app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
                return list_entry(e, struct ip_vs_app, a_list);
 
        /* go on to next application */
-       for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+       for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
                app = list_entry(e, struct ip_vs_app, a_list);
                list_for_each_entry(inc, &app->incs_list, a_list) {
                        return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-       mutex_unlock(&__ip_vs_app_mutex);
+       struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+       mutex_unlock(&ipvs->app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
 
 static int ip_vs_app_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_app_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+                           sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
 {
-       /* we will replace it with proc_net_ipvs_create() soon */
-       proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->app_list);
+       __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+       proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
        return 0;
 }
 
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+       proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+       .init = __ip_vs_app_init,
+       .exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_app_ops);
+       return rv;
+}
+
 
 void ip_vs_app_cleanup(void)
 {
-       proc_net_remove(&init_net, "ip_vs_app");
+       unregister_pernet_subsys(&ip_vs_app_ops);
 }
index e9adecd..83233fe 100644 (file)
 /*
  * Connection hash size. Default is what was selected at compile time.
 */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 
 /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
 
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
 
 /*  SLAB cache for IPVS connections */
 static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
 
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
 /*  counter for no client port connections */
 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
 
 /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
 
 /*
  *  Fine locking granularity for big connection hash table
  */
-#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_BITS  5
 #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
 #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
 
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *     Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
                                       const union nf_inet_addr *addr,
                                       __be16 port)
 {
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6)
-               return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-                                   (__force u32)port, proto, ip_vs_conn_rnd)
-                       & ip_vs_conn_tab_mask;
+               return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+                                   (__force u32)port, proto, ip_vs_conn_rnd) ^
+                       ((size_t)net>>8)) & ip_vs_conn_tab_mask;
 #endif
-       return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-                           ip_vs_conn_rnd)
-               & ip_vs_conn_tab_mask;
+       return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+                           ip_vs_conn_rnd) ^
+               ((size_t)net>>8)) & ip_vs_conn_tab_mask;
 }
 
 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
                port = p->vport;
        }
 
-       return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+       return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
 }
 
 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 {
        struct ip_vs_conn_param p;
 
-       ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
-                             NULL, 0, &p);
+       ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+                             &cp->caddr, cp->cport, NULL, 0, &p);
 
-       if (cp->dest && cp->dest->svc->pe) {
-               p.pe = cp->dest->svc->pe;
+       if (cp->pe) {
+               p.pe = cp->pe;
                p.pe_data = cp->pe_data;
                p.pe_data_len = cp->pe_data_len;
        }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 }
 
 /*
- *     Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *     Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
  *     returns bool success.
  */
 static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (cp->af == p->af &&
+                   p->cport == cp->cport && p->vport == cp->vport &&
                    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
                    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
-                   p->cport == cp->cport && p->vport == cp->vport &&
                    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ct_read_unlock(hash);
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
                            struct ip_vs_conn_param *p)
 {
        __be16 _ports[2], *pptr;
+       struct net *net = skb_net(skb);
 
        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
        if (pptr == NULL)
                return 1;
 
        if (likely(!inverse))
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
-                                     &iph->daddr, pptr[1], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+                                     pptr[0], &iph->daddr, pptr[1], p);
        else
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
-                                     &iph->saddr, pptr[0], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+                                     pptr[1], &iph->saddr, pptr[0], p);
        return 0;
 }
 
 struct ip_vs_conn *
 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                        const struct ip_vs_iphdr *iph,
                        unsigned int proto_off, int inverse)
 {
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+               if (!ip_vs_conn_net_eq(cp, p->net))
+                       continue;
                if (p->pe_data && p->pe->ct_match) {
-                       if (p->pe->ct_match(p, cp))
+                       if (p->pe == cp->pe && p->pe->ct_match(p, cp))
                                goto out;
                        continue;
                }
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (cp->af == p->af &&
+                   p->vport == cp->cport && p->cport == cp->dport &&
                    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
                    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
-                   p->vport == cp->cport && p->cport == cp->dport &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ret = cp;
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 struct ip_vs_conn *
 ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                        struct ip_vs_protocol *pp,
                         const struct ip_vs_iphdr *iph,
                         unsigned int proto_off, int inverse)
 {
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
        struct ip_vs_dest *dest;
 
        if ((cp) && (!cp->dest)) {
-               dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-                                      &cp->vaddr, cp->vport,
-                                      cp->protocol);
+               dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+                                      cp->dport, &cp->vaddr, cp->vport,
+                                      cp->protocol, cp->fwmark);
                ip_vs_bind_dest(cp, dest);
                return dest;
        } else
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 int ip_vs_check_template(struct ip_vs_conn *ct)
 {
        struct ip_vs_dest *dest = ct->dest;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
 
        /*
         * Checking the dest server status.
         */
        if ((dest == NULL) ||
            !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-           (sysctl_ip_vs_expire_quiescent_template &&
+           (ipvs->sysctl_expire_quiescent_template &&
             (atomic_read(&dest->weight) == 0))) {
                IP_VS_DBG_BUF(9, "check_template: dest not available for "
                              "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 {
        struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 
        cp->timeout = 60*HZ;
 
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
                if (cp->flags & IP_VS_CONN_F_NFCT)
                        ip_vs_conn_drop_conntrack(cp);
 
+               ip_vs_pe_put(cp->pe);
                kfree(cp->pe_data);
                if (unlikely(cp->app != NULL))
                        ip_vs_unbind_app(cp);
                ip_vs_unbind_dest(cp);
                if (cp->flags & IP_VS_CONN_F_NO_CPORT)
                        atomic_dec(&ip_vs_conn_no_cport_cnt);
-               atomic_dec(&ip_vs_conn_count);
+               atomic_dec(&ipvs->conn_count);
 
                kmem_cache_free(ip_vs_conn_cachep, cp);
                return;
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 struct ip_vs_conn *
 ip_vs_conn_new(const struct ip_vs_conn_param *p,
               const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-              struct ip_vs_dest *dest)
+              struct ip_vs_dest *dest, __u32 fwmark)
 {
        struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+       struct netns_ipvs *ipvs = net_ipvs(p->net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+                                                          p->protocol);
 
        cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
        if (cp == NULL) {
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 
        INIT_LIST_HEAD(&cp->c_list);
        setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+       ip_vs_conn_net_set(cp, p->net);
        cp->af             = p->af;
        cp->protocol       = p->protocol;
        ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
                        &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags          = flags;
-       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+       cp->fwmark         = fwmark;
+       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+               ip_vs_pe_get(p->pe);
+               cp->pe = p->pe;
                cp->pe_data = p->pe_data;
                cp->pe_data_len = p->pe_data_len;
        }
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
        atomic_set(&cp->n_control, 0);
        atomic_set(&cp->in_pkts, 0);
 
-       atomic_inc(&ip_vs_conn_count);
+       atomic_inc(&ipvs->conn_count);
        if (flags & IP_VS_CONN_F_NO_CPORT)
                atomic_inc(&ip_vs_conn_no_cport_cnt);
 
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 #endif
                ip_vs_bind_xmit(cp);
 
-       if (unlikely(pp && atomic_read(&pp->appcnt)))
-               ip_vs_bind_app(cp, pp);
+       if (unlikely(pd && atomic_read(&pd->appcnt)))
+               ip_vs_bind_app(cp, pd->pp);
 
        /*
         * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
         * IP_VS_CONN_F_ONE_PACKET too.
         */
 
-       if (ip_vs_conntrack_enabled())
+       if (ip_vs_conntrack_enabled(ipvs))
                cp->flags |= IP_VS_CONN_F_NFCT;
 
        /* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  *     /proc/net/ip_vs_conn entries
  */
 #ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+       struct seq_net_private p;
+       struct list_head *l;
+};
 
 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 {
        int idx;
        struct ip_vs_conn *cp;
+       struct ip_vs_iter_state *iter = seq->private;
 
        for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                ct_read_lock_bh(idx);
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
                        if (pos-- == 0) {
-                               seq->private = &ip_vs_conn_tab[idx];
+                               iter->l = &ip_vs_conn_tab[idx];
                        return cp;
                        }
                }
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 
 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       seq->private = NULL;
+       struct ip_vs_iter_state *iter = seq->private;
+
+       iter->l = NULL;
        return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
 
 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct ip_vs_conn *cp = v;
-       struct list_head *e, *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *e, *l = iter->l;
        int idx;
 
        ++*pos;
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        while (++idx < ip_vs_conn_tab_size) {
                ct_read_lock_bh(idx);
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                       seq->private = &ip_vs_conn_tab[idx];
+                       iter->l = &ip_vs_conn_tab[idx];
                        return cp;
                }
                ct_read_unlock_bh(idx);
        }
-       seq->private = NULL;
+       iter->l = NULL;
        return NULL;
 }
 
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
 {
-       struct list_head *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *l = iter->l;
 
        if (l)
                ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
        else {
                const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
                char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
                size_t len = 0;
 
-               if (cp->dest && cp->pe_data &&
-                   cp->dest->svc->pe->show_pe_data) {
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
+               if (cp->pe_data) {
                        pe_data[0] = ' ';
-                       len = strlen(cp->dest->svc->pe->name);
-                       memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+                       len = strlen(cp->pe->name);
+                       memcpy(pe_data + 1, cp->pe->name, len);
                        pe_data[len + 1] = ' ';
                        len += 2;
-                       len += cp->dest->svc->pe->show_pe_data(cp,
-                                                              pe_data + len);
+                       len += cp->pe->show_pe_data(cp, pe_data + len);
                }
                pe_data[len] = '\0';
 
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
 
 static int ip_vs_conn_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_conn_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
        else {
                const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
+
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
 
 #ifdef CONFIG_IP_VS_IPV6
                if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 
 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_conn_sync_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 }
 
 /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
 {
        int idx;
        struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
                        if (cp->flags & IP_VS_CONN_F_TEMPLATE)
                                /* connection template */
                                continue;
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                        if (cp->protocol == IPPROTO_TCP) {
                                switch(cp->state) {
                                case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
  */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
 {
        int idx;
        struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-  flush_again:
+flush_again:
        for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                /*
                 *  Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
                ct_write_lock_bh(idx);
 
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                        IP_VS_DBG(4, "del connection\n");
                        ip_vs_conn_expire_now(cp);
                        if (cp->control) {
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
 
        /* the counter may be not NULL, because maybe some conn entries
           are run by slow timer handler or unhashed but still referred */
-       if (atomic_read(&ip_vs_conn_count) != 0) {
+       if (atomic_read(&ipvs->conn_count) != 0) {
                schedule();
                goto flush_again;
        }
 }
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       atomic_set(&ipvs->conn_count, 0);
+
+       proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+       proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       return 0;
+}
 
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+       /* flush all the connection entries first */
+       ip_vs_conn_flush(net);
+       proc_net_remove(net, "ip_vs_conn");
+       proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+       .init = __ip_vs_conn_init,
+       .exit = __ip_vs_conn_cleanup,
+};
 
 int __init ip_vs_conn_init(void)
 {
        int idx;
+       int retc;
 
        /* Compute size and mask */
        ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
                rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
        }
 
-       proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-       proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       retc = register_pernet_subsys(&ipvs_conn_ops);
 
        /* calculate the random value for connection hash */
        get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
 
-       return 0;
+       return retc;
 }
 
-
 void ip_vs_conn_cleanup(void)
 {
-       /* flush all the connection entries first */
-       ip_vs_conn_flush();
-
+       unregister_pernet_subsys(&ipvs_conn_ops);
        /* Release the empty cache */
        kmem_cache_destroy(ip_vs_conn_cachep);
-       proc_net_remove(&init_net, "ip_vs_conn");
-       proc_net_remove(&init_net, "ip_vs_conn_sync");
        vfree(ip_vs_conn_tab);
 }
index b4e51e9..f36a84f 100644 (file)
@@ -41,6 +41,7 @@
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>
 #include <net/ip6_checksum.h>
+#include <net/netns/generic.h>         /* net_generic() */
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
        struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.inpkts++;
-               dest->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.inpkts++;
-               dest->svc->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.inpkts++;
-               ip_vs_stats.ustats.inbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
        }
 }
 
@@ -131,21 +145,28 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
        struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.outpkts++;
-               dest->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.outpkts++;
-               dest->svc->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.outpkts++;
-               ip_vs_stats.ustats.outbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
        }
 }
 
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
-       spin_lock(&cp->dest->stats.lock);
-       cp->dest->stats.ustats.conns++;
-       spin_unlock(&cp->dest->stats.lock);
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
+       struct ip_vs_cpu_stats *s;
+
+       s = this_cpu_ptr(cp->dest->stats.cpustats);
+       s->ustats.conns++;
 
-       spin_lock(&svc->stats.lock);
-       svc->stats.ustats.conns++;
-       spin_unlock(&svc->stats.lock);
+       s = this_cpu_ptr(svc->stats.cpustats);
+       s->ustats.conns++;
 
-       spin_lock(&ip_vs_stats.lock);
-       ip_vs_stats.ustats.conns++;
-       spin_unlock(&ip_vs_stats.lock);
+       s = this_cpu_ptr(ipvs->cpustats);
+       s->ustats.conns++;
 }
 
 
 static inline int
 ip_vs_set_state(struct ip_vs_conn *cp, int direction,
                const struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
 {
-       if (unlikely(!pp->state_transition))
+       if (unlikely(!pd->pp->state_transition))
                return 0;
-       return pp->state_transition(cp, direction, skb, pp);
+       return pd->pp->state_transition(cp, direction, skb, pd);
 }
 
-static inline void
+static inline int
 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
                              struct sk_buff *skb, int protocol,
                              const union nf_inet_addr *caddr, __be16 cport,
                              const union nf_inet_addr *vaddr, __be16 vport,
                              struct ip_vs_conn_param *p)
 {
-       ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+       ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+                             vport, p);
        p->pe = svc->pe;
        if (p->pe && p->pe->fill_param)
-               p->pe->fill_param(p, skb);
+               return p->pe->fill_param(p, skb);
+
+       return 0;
 }
 
 /*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
                    struct sk_buff *skb,
-                   __be16 ports[2])
+                   __be16 src_port, __be16 dst_port, int *ignored)
 {
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 
        IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
                      "mnet %s\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
                      IP_VS_DBG_ADDR(svc->af, &snet));
 
        /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
                __be16 vport = 0;
 
-               if (ports[1] == svc->port) {
+               if (dst_port == svc->port) {
                        /* non-FTP template:
                         * <protocol, caddr, 0, vaddr, vport, daddr, dport>
                         * FTP template:
                         * <protocol, caddr, 0, vaddr, 0, daddr, 0>
                         */
                        if (svc->port != FTPPORT)
-                               vport = ports[1];
+                               vport = dst_port;
                } else {
                        /* Note: persistent fwmark-based services and
                         * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                vaddr = &fwmark;
                        }
                }
-               ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-                                             vaddr, vport, &param);
+               /* return *ignored = -1 so NF_DROP can be used */
+               if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+                                                 vaddr, vport, &param) < 0) {
+                       *ignored = -1;
+                       return NULL;
+               }
        }
 
        /* Check if a template already exists */
        ct = ip_vs_ct_in_get(&param);
        if (!ct || !ip_vs_check_template(ct)) {
-               /* No template found or the dest of the connection
+               /*
+                * No template found or the dest of the connection
                 * template is not available.
+                * return *ignored=0 i.e. ICMP and NF_DROP
                 */
                dest = svc->scheduler->schedule(svc, skb);
                if (!dest) {
                        IP_VS_DBG(1, "p-schedule: no dest found.\n");
                        kfree(param.pe_data);
+                       *ignored = 0;
                        return NULL;
                }
 
-               if (ports[1] == svc->port && svc->port != FTPPORT)
+               if (dst_port == svc->port && svc->port != FTPPORT)
                        dport = dest->port;
 
                /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * and thus param.pe_data will be destroyed
                 * when the template expires */
                ct = ip_vs_conn_new(&param, &dest->addr, dport,
-                                   IP_VS_CONN_F_TEMPLATE, dest);
+                                   IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
                if (ct == NULL) {
                        kfree(param.pe_data);
+                       *ignored = -1;
                        return NULL;
                }
 
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                kfree(param.pe_data);
        }
 
-       dport = ports[1];
+       dport = dst_port;
        if (dport == svc->port && dest->port)
                dport = dest->port;
 
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        /*
         *    Create a new connection according to the template
         */
-       ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-                             &iph.daddr, ports[1], &param);
-       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+                             src_port, &iph.daddr, dst_port, &param);
+
+       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
        if (cp == NULL) {
                ip_vs_conn_put(ct);
+               *ignored = -1;
                return NULL;
        }
 
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
  */
 struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored)
+              struct ip_vs_proto_data *pd, int *ignored)
 {
+       struct ip_vs_protocol *pp = pd->pp;
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        }
 
        /*
-        * Do not schedule replies from local real server. It is risky
-        * for fwmark services but mostly for persistent services.
+        *    Do not schedule replies from local real server.
         */
        if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-           (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
-           (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+           (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
                IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
                              "Not scheduling reply for existing connection");
                __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        /*
         *    Persistent service
         */
-       if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-               *ignored = 0;
-               return ip_vs_sched_persist(svc, skb, pptr);
-       }
+       if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+               return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+       *ignored = 0;
 
        /*
         *    Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                return NULL;
        }
 
-       *ignored = 0;
-
        dest = svc->scheduler->schedule(svc, skb);
        if (dest == NULL) {
                IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
         */
        {
                struct ip_vs_conn_param p;
-               ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
-                                     pptr[0], &iph.daddr, pptr[1], &p);
+
+               ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+                                     &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+                                     &p);
                cp = ip_vs_conn_new(&p, &dest->addr,
                                    dest->port ? dest->port : pptr[1],
-                                   flags, dest);
-               if (!cp)
+                                   flags, dest, skb->mark);
+               if (!cp) {
+                       *ignored = -1;
                        return NULL;
+               }
        }
 
        IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
  *  no destination is available for a new connection.
  */
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
 {
+       struct net *net;
+       struct netns_ipvs *ipvs;
        __be16 _ports[2], *pptr;
        struct ip_vs_iphdr iph;
        int unicast;
+
        ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
        pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                ip_vs_service_put(svc);
                return NF_DROP;
        }
+       net = skb_net(skb);
 
 #ifdef CONFIG_IP_VS_IPV6
        if (svc->af == AF_INET6)
                unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
        else
 #endif
-               unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+               unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
 
        /* if it is fwmark-based service, the cache_bypass sysctl is up
           and the destination is a non-local unicast, then create
           a cache_bypass connection entry */
-       if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+       ipvs = net_ipvs(net);
+       if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
                int ret, cs;
                struct ip_vs_conn *cp;
                unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
                {
                        struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(svc->af, iph.protocol,
+                       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
                                              &iph.saddr, pptr[0],
                                              &iph.daddr, pptr[1], &p);
                        cp = ip_vs_conn_new(&p, &daddr, 0,
                                            IP_VS_CONN_F_BYPASS | flags,
-                                           NULL);
+                                           NULL, skb->mark);
                        if (!cp)
                                return NF_DROP;
                }
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                ip_vs_in_stats(cp, skb);
 
                /* set state */
-               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 
                /* transmit the first SYN packet */
-               ret = cp->packet_xmit(skb, cp, pp);
+               ret = cp->packet_xmit(skb, cp, pd->pp);
                /* do not touch skb anymore */
 
                atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
                                struct ip_vs_protocol *pp,
                                unsigned int offset, unsigned int ihl)
 {
+       struct netns_ipvs *ipvs;
        unsigned int verdict = NF_DROP;
 
        if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
        if (!skb_make_writable(skb, offset))
                goto out;
 
+       ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6)
                ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                        goto out;
        } else
 #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                    ip_route_me_harder(skb, RTN_LOCAL) != 0)
                        goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 
        ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
  * Used for NAT and local client.
  */
 static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                struct ip_vs_conn *cp, int ihl)
 {
+       struct ip_vs_protocol *pp = pd->pp;
+       struct netns_ipvs *ipvs;
+
        IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
        if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
         * if it came from this machine itself.  So re-compute
         * the routing information.
         */
+       ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                        goto drop;
        } else
 #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                    ip_route_me_harder(skb, RTN_LOCAL) != 0)
                        goto drop;
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
        IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
        ip_vs_out_stats(cp, skb);
-       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
        skb->ipvs_property = 1;
        if (!(cp->flags & IP_VS_CONN_F_NFCT))
                ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@ drop:
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+       struct net *net = NULL;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs;
 
        EnterFunction(11);
 
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
        if (unlikely(!skb_dst(skb)))
                return NF_ACCEPT;
 
+       net = skb_net(skb);
        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                }
 
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* reassemble IP fragments */
 #ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
        /*
         * Check if the packet belongs to an existing entry
         */
-       cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+       ipvs = net_ipvs(net);
 
        if (likely(cp))
-               return handle_response(af, skb, pp, cp, iph.len);
-       if (sysctl_ip_vs_nat_icmp_send &&
+               return handle_response(af, skb, pd, cp, iph.len);
+       if (ipvs->sysctl_nat_icmp_send &&
            (pp->protocol == IPPROTO_TCP ||
             pp->protocol == IPPROTO_UDP ||
             pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                                          sizeof(_ports), _ports);
                if (pptr == NULL)
                        return NF_ACCEPT;       /* Not for me */
-               if (ip_vs_lookup_real_service(af, iph.protocol,
+               if (ip_vs_lookup_real_service(net, af, iph.protocol,
                                              &iph.saddr,
                                              pptr[0])) {
                        /*
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
 static int
 ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+       struct net *net = NULL;
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
        struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        unsigned int offset, ihl, verdict;
        union nf_inet_addr snet;
 
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        if (cih == NULL)
                return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-       pp = ip_vs_proto_get(cih->protocol);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->protocol);
+       if (!pd)
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* Is the embedded protocol header present? */
        if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
        if (!cp) {
                /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
                if (cp) {
                        snet.ip = iph->saddr;
                        return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 static int
 ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+       struct net *net = NULL;
        struct ipv6hdr *iph;
        struct icmp6hdr _icmph, *ic;
        struct ipv6hdr  _ciph, *cih;    /* The ip header contained
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
        struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        unsigned int offset, verdict;
        union nf_inet_addr snet;
        struct rt6_info *rt;
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
        if (cih == NULL)
                return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-       pp = ip_vs_proto_get(cih->nexthdr);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->nexthdr);
+       if (!pd)
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* Is the embedded protocol header present? */
        /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
        if (!cp) {
                /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
                if (cp) {
                        ipv6_addr_copy(&snet.in6, &iph->saddr);
                        return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 static unsigned int
 ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+       struct net *net;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
        int ret, restart, pkts;
+       struct netns_ipvs *ipvs;
 
        /* Already marked as IPVS request or reply? */
        if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
                        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                }
 
+       net = skb_net(skb);
        /* Protocol supported? */
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                return NF_ACCEPT;
-
+       pp = pd->pp;
        /*
         * Check if the packet belongs to an existing connection entry
         */
-       cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
 
        if (unlikely(!cp)) {
                int v;
 
-               if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+               if (!pp->conn_schedule(af, skb, pd, &v, &cp))
                        return v;
        }
 
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+       net = skb_net(skb);
+       ipvs = net_ipvs(net);
        /* Check the server status */
        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                /* the destination server is not available */
 
-               if (sysctl_ip_vs_expire_nodest_conn) {
+               if (ipvs->sysctl_expire_nodest_conn) {
                        /* try to expire the connection immediately */
                        ip_vs_conn_expire_now(cp);
                }
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        ip_vs_in_stats(cp, skb);
-       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
        if (cp->packet_xmit)
                ret = cp->packet_xmit(skb, cp, pp);
                /* do not touch skb anymore */
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         *
         * Sync connection if it is about to close to
         * encorage the standby servers to update the connections timeout
+        *
+        * For ONE_PKT let ip_vs_sync_conn() do the filter work.
         */
-       pkts = atomic_add_return(1, &cp->in_pkts);
-       if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               pkts = ipvs->sysctl_sync_threshold[0];
+       else
+               pkts = atomic_add_return(1, &cp->in_pkts);
+
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
            cp->protocol == IPPROTO_SCTP) {
                if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-                       (pkts % sysctl_ip_vs_sync_threshold[1]
-                        == sysctl_ip_vs_sync_threshold[0])) ||
+                       (pkts % ipvs->sysctl_sync_threshold[1]
+                        == ipvs->sysctl_sync_threshold[0])) ||
                                (cp->old_state != cp->state &&
                                 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
                                  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
                                  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-                       ip_vs_sync_conn(cp);
+                       ip_vs_sync_conn(net, cp);
                        goto out;
                }
        }
 
        /* Keep this block last: TCP and others with pp->num_states <= 1 */
-       else if (af == AF_INET &&
-           (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
            (((cp->protocol != IPPROTO_TCP ||
               cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-             (pkts % sysctl_ip_vs_sync_threshold[1]
-              == sysctl_ip_vs_sync_threshold[0])) ||
+             (pkts % ipvs->sysctl_sync_threshold[1]
+              == ipvs->sysctl_sync_threshold[0])) ||
             ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
              ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
               (cp->state == IP_VS_TCP_S_CLOSE) ||
               (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
               (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-               ip_vs_sync_conn(cp);
+               ip_vs_sync_conn(net, cp);
 out:
        cp->old_state = cp->state;
 
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        },
 #endif
 };
+/*
+ *     Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+       struct netns_ipvs *ipvs;
+
+       ipvs = net_generic(net, ip_vs_net_id);
+       if (ipvs == NULL) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       ipvs->net = net;
+       /* Counters used for creating unique names */
+       ipvs->gen = atomic_read(&ipvs_netns_cnt);
+       atomic_inc(&ipvs_netns_cnt);
+       net->ipvs = ipvs;
+       printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+                        sizeof(struct netns_ipvs), ipvs->gen);
+       return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+       .init = __ip_vs_init,
+       .exit = __ip_vs_cleanup,
+       .id   = &ip_vs_net_id,
+       .size = sizeof(struct netns_ipvs),
+};
 
 /*
  *     Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
 {
        int ret;
 
-       ip_vs_estimator_init();
+       ret = register_pernet_subsys(&ipvs_core_ops);   /* Alloc ip_vs struct */
+       if (ret < 0)
+               return ret;
 
+       ip_vs_estimator_init();
        ret = ip_vs_control_init();
        if (ret < 0) {
                pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
                goto cleanup_app;
        }
 
+       ret = ip_vs_sync_init();
+       if (ret < 0) {
+               pr_err("can't setup sync data.\n");
+               goto cleanup_conn;
+       }
+
        ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
        if (ret < 0) {
                pr_err("can't register hooks.\n");
-               goto cleanup_conn;
+               goto cleanup_sync;
        }
 
        pr_info("ipvs loaded.\n");
        return ret;
 
+cleanup_sync:
+       ip_vs_sync_cleanup();
   cleanup_conn:
        ip_vs_conn_cleanup();
   cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
        ip_vs_control_cleanup();
   cleanup_estimator:
        ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
        return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
        nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+       ip_vs_sync_cleanup();
        ip_vs_conn_cleanup();
        ip_vs_app_cleanup();
        ip_vs_protocol_cleanup();
        ip_vs_control_cleanup();
        ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
        pr_info("ipvs unloaded.\n");
 }
 
index 22f7ad5..09ca2ce 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 
 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
 /* lock for service table */
 static DEFINE_RWLOCK(__ip_vs_svc_lock);
 
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
 /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
 
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+                                   const struct in6_addr *addr)
 {
        struct rt6_info *rt;
        struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
                .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
        };
 
-       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
                        return 1;
 
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
 {
        struct sysinfo i;
        static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
        /* si_swapinfo(&i); */
        /* availmem = availmem - (i.totalswap - i.freeswap); */
 
-       nomem = (availmem < sysctl_ip_vs_amemthresh);
+       nomem = (availmem < ipvs->sysctl_amemthresh);
 
        local_bh_disable();
 
        /* drop_entry */
-       spin_lock(&__ip_vs_dropentry_lock);
-       switch (sysctl_ip_vs_drop_entry) {
+       spin_lock(&ipvs->dropentry_lock);
+       switch (ipvs->sysctl_drop_entry) {
        case 0:
-               atomic_set(&ip_vs_dropentry, 0);
+               atomic_set(&ipvs->dropentry, 0);
                break;
        case 1:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
-                       sysctl_ip_vs_drop_entry = 2;
+                       atomic_set(&ipvs->dropentry, 1);
+                       ipvs->sysctl_drop_entry = 2;
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
+                       atomic_set(&ipvs->dropentry, 0);
                }
                break;
        case 2:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
+                       atomic_set(&ipvs->dropentry, 1);
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
-                       sysctl_ip_vs_drop_entry = 1;
+                       atomic_set(&ipvs->dropentry, 0);
+                       ipvs->sysctl_drop_entry = 1;
                };
                break;
        case 3:
-               atomic_set(&ip_vs_dropentry, 1);
+               atomic_set(&ipvs->dropentry, 1);
                break;
        }
-       spin_unlock(&__ip_vs_dropentry_lock);
+       spin_unlock(&ipvs->dropentry_lock);
 
        /* drop_packet */
-       spin_lock(&__ip_vs_droppacket_lock);
-       switch (sysctl_ip_vs_drop_packet) {
+       spin_lock(&ipvs->droppacket_lock);
+       switch (ipvs->sysctl_drop_packet) {
        case 0:
-               ip_vs_drop_rate = 0;
+               ipvs->drop_rate = 0;
                break;
        case 1:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
-                       sysctl_ip_vs_drop_packet = 2;
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
+                       ipvs->sysctl_drop_packet = 2;
                } else {
-                       ip_vs_drop_rate = 0;
+                       ipvs->drop_rate = 0;
                }
                break;
        case 2:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
                } else {
-                       ip_vs_drop_rate = 0;
-                       sysctl_ip_vs_drop_packet = 1;
+                       ipvs->drop_rate = 0;
+                       ipvs->sysctl_drop_packet = 1;
                }
                break;
        case 3:
-               ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+               ipvs->drop_rate = ipvs->sysctl_am_droprate;
                break;
        }
-       spin_unlock(&__ip_vs_droppacket_lock);
+       spin_unlock(&ipvs->droppacket_lock);
 
        /* secure_tcp */
-       spin_lock(&ip_vs_securetcp_lock);
-       switch (sysctl_ip_vs_secure_tcp) {
+       spin_lock(&ipvs->securetcp_lock);
+       switch (ipvs->sysctl_secure_tcp) {
        case 0:
                if (old_secure_tcp >= 2)
                        to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
                if (nomem) {
                        if (old_secure_tcp < 2)
                                to_change = 1;
-                       sysctl_ip_vs_secure_tcp = 2;
+                       ipvs->sysctl_secure_tcp = 2;
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
-                       sysctl_ip_vs_secure_tcp = 1;
+                       ipvs->sysctl_secure_tcp = 1;
                }
                break;
        case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
                        to_change = 1;
                break;
        }
-       old_secure_tcp = sysctl_ip_vs_secure_tcp;
+       old_secure_tcp = ipvs->sysctl_secure_tcp;
        if (to_change >= 0)
-               ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-       spin_unlock(&ip_vs_securetcp_lock);
+               ip_vs_protocol_timeout_change(ipvs,
+                                             ipvs->sysctl_secure_tcp > 1);
+       spin_unlock(&ipvs->securetcp_lock);
 
        local_bh_enable();
 }
@@ -250,16 +218,16 @@ static void update_defense_level(void)
  *     Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD   1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
 static void defense_work_handler(struct work_struct *work)
 {
-       update_defense_level();
-       if (atomic_read(&ip_vs_dropentry))
-               ip_vs_random_dropentry();
+       struct netns_ipvs *ipvs =
+               container_of(work, struct netns_ipvs, defense_work.work);
 
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+       update_defense_level(ipvs);
+       if (atomic_read(&ipvs->dropentry))
+               ip_vs_random_dropentry(ipvs->net);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
 
 int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
-/*
- *     Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *     Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *     FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
 
 /*
  *     Returns hash value for virtual service
  */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-                 __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+                 const union nf_inet_addr *addr, __be16 port)
 {
        register unsigned porth = ntohs(port);
        __be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
+       addr_fold ^= ((size_t)net>>8);
 
        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
                & IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 /*
  *     Returns hash value of fwmark for virtual service lookup
  */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 {
-       return fwmark & IP_VS_SVC_TAB_MASK;
+       return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
- *     Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *     Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  *     or in the ip_vs_svc_fwm_table by fwmark.
  *     Should be called with locked tables.
  */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
        if (svc->fwmark == 0) {
                /*
-                *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+                *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
                 */
-               hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-                                        svc->port);
+               hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+                                        &svc->addr, svc->port);
                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
        } else {
                /*
-                *  Hash it by fwmark in ip_vs_svc_fwm_table
+                *  Hash it by fwmark in svc_fwm_table
                 */
-               hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+               hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
        }
 
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 
 /*
- *     Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *     Unhashes a service from svc_table / svc_fwm_table.
  *     Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
        }
 
        if (svc->fwmark == 0) {
-               /* Remove it from the ip_vs_svc_table table */
+               /* Remove it from the svc_table table */
                list_del(&svc->s_list);
        } else {
-               /* Remove it from the ip_vs_svc_fwm_table table */
+               /* Remove it from the svc_fwm_table table */
                list_del(&svc->f_list);
        }
 
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 
 /*
- *     Get service by {proto,addr,port} in the service table.
+ *     Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-                   __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+                    const union nf_inet_addr *vaddr, __be16 vport)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for "full" addressed entries */
-       hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+       hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
                if ((svc->af == af)
                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
                    && (svc->port == vport)
-                   && (svc->protocol == protocol)) {
+                   && (svc->protocol == protocol)
+                   && net_eq(svc->net, net)) {
                        /* HIT */
                        return svc;
                }
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *     Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for fwmark addressed entries */
-       hash = ip_vs_svc_fwm_hashkey(fwmark);
+       hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-               if (svc->fwmark == fwmark && svc->af == af) {
+               if (svc->fwmark == fwmark && svc->af == af
+                   && net_eq(svc->net, net)) {
                        /* HIT */
                        return svc;
                }
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport)
 {
        struct ip_vs_service *svc;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        read_lock(&__ip_vs_svc_lock);
 
        /*
         *      Check the table hashed by fwmark first
         */
-       if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+       svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+       if (fwmark && svc)
                goto out;
 
        /*
         *      Check the table hashed by <protocol,addr,port>
         *      for "full" addressed entries
         */
-       svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+       svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 
        if (svc == NULL
            && protocol == IPPROTO_TCP
-           && atomic_read(&ip_vs_ftpsvc_counter)
+           && atomic_read(&ipvs->ftpsvc_counter)
            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
                /*
                 * Check if ftp service entry exists, the packet
                 * might belong to FTP data connections.
                 */
-               svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
        }
 
        if (svc == NULL
-           && atomic_read(&ip_vs_nullsvc_counter)) {
+           && atomic_read(&ipvs->nullsvc_counter)) {
                /*
                 * Check if the catch-all port (port zero) exists
                 */
-               svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
        }
 
   out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
                              svc->fwmark,
                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
                              ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
 }
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }
 
 /*
- *     Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *     Hashes ip_vs_dest in rs_table by <proto,addr,port>.
  *     should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
        unsigned hash;
 
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
         */
        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-       list_add(&dest->d_list, &ip_vs_rtable[hash]);
+       list_add(&dest->d_list, &ipvs->rs_table[hash]);
 
        return 1;
 }
 
 /*
- *     UNhashes ip_vs_dest from ip_vs_rtable.
+ *     UNhashes ip_vs_dest from rs_table.
  *     should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
        /*
-        * Remove it from the ip_vs_rtable table.
+        * Remove it from the rs_table table.
         */
        if (!list_empty(&dest->d_list)) {
                list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *     Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                          const union nf_inet_addr *daddr,
                          __be16 dport)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        unsigned hash;
        struct ip_vs_dest *dest;
 
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
         */
        hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-       read_lock(&__ip_vs_rs_lock);
-       list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+       read_lock(&ipvs->rs_lock);
+       list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
                if ((dest->af == af)
                    && ip_vs_addr_equal(af, &dest->addr, daddr)
                    && (dest->port == dport)
                    && ((dest->protocol == protocol) ||
                        dest->vfwmark)) {
                        /* HIT */
-                       read_unlock(&__ip_vs_rs_lock);
+                       read_unlock(&ipvs->rs_lock);
                        return dest;
                }
        }
-       read_unlock(&__ip_vs_rs_lock);
+       read_unlock(&ipvs->rs_lock);
 
        return NULL;
 }
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+                                  const union nf_inet_addr *daddr,
                                   __be16 dport,
                                   const union nf_inet_addr *vaddr,
-                                  __be16 vport, __u16 protocol)
+                                  __be16 vport, __u16 protocol, __u32 fwmark)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_service *svc;
 
-       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
        if (!svc)
                return NULL;
        dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                     __be16 dport)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        /*
         * Find the destination in trash
         */
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
                              "dest->refcnt=%d\n",
                              dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                        list_del(&dest->n_list);
                        ip_vs_dst_reset(dest);
                        __ip_vs_unbind_svc(dest);
+                       free_percpu(dest->stats.cpustats);
                        kfree(dest);
                }
        }
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                list_del(&dest->n_list);
                ip_vs_dst_reset(dest);
                __ip_vs_unbind_svc(dest);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        }
 }
@@ -768,6 +728,7 @@ static void
 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                    struct ip_vs_dest_user_kern *udest, int add)
 {
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
        int conn_flags;
 
        /* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
        } else {
                /*
-                *    Put the real service in ip_vs_rtable if not present.
+                *    Put the real service in rs_table if not present.
                 *    For now only for NAT!
                 */
-               write_lock_bh(&__ip_vs_rs_lock);
-               ip_vs_rs_hash(dest);
-               write_unlock_bh(&__ip_vs_rs_lock);
+               write_lock_bh(&ipvs->rs_lock);
+               ip_vs_rs_hash(ipvs, dest);
+               write_unlock_bh(&ipvs->rs_lock);
        }
        atomic_set(&dest->conn_flags, conn_flags);
 
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        spin_unlock(&dest->dst_lock);
 
        if (add)
-               ip_vs_new_estimator(&dest->stats);
+               ip_vs_new_estimator(svc->net, &dest->stats);
 
        write_lock_bh(&__ip_vs_svc_lock);
 
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                atype = ipv6_addr_type(&udest->addr.in6);
                if ((!(atype & IPV6_ADDR_UNICAST) ||
                        atype & IPV6_ADDR_LINKLOCAL) &&
-                       !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+                       !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
                        return -EINVAL;
        } else
 #endif
        {
-               atype = inet_addr_type(&init_net, udest->addr.ip);
+               atype = inet_addr_type(svc->net, udest->addr.ip);
                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
                        return -EINVAL;
        }
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                pr_err("%s(): no memory.\n", __func__);
                return -ENOMEM;
        }
+       dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!dest->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
 
        dest->af = svc->af;
        dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
        LeaveFunction(2);
        return 0;
+
+err_alloc:
+       kfree(dest);
+       return -ENOMEM;
 }
 
 
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *     Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-       ip_vs_kill_estimator(&dest->stats);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_kill_estimator(net, &dest->stats);
 
        /*
         *  Remove it from the d-linked list with the real services.
         */
-       write_lock_bh(&__ip_vs_rs_lock);
+       write_lock_bh(&ipvs->rs_lock);
        ip_vs_rs_unhash(dest);
-       write_unlock_bh(&__ip_vs_rs_lock);
+       write_unlock_bh(&ipvs->rs_lock);
 
        /*
         *  Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                   and only one user context can update virtual service at a
                   time, so the operation here is OK */
                atomic_dec(&dest->svc->refcnt);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        } else {
                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port),
                              atomic_read(&dest->refcnt));
-               list_add(&dest->n_list, &ip_vs_dest_trash);
+               list_add(&dest->n_list, &ipvs->dest_trash);
                atomic_inc(&dest->refcnt);
        }
 }
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        /*
         *      Delete the destination
         */
-       __ip_vs_del_dest(dest);
+       __ip_vs_del_dest(svc->net, dest);
 
        LeaveFunction(2);
 
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *     Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
                  struct ip_vs_service **svc_p)
 {
        int ret = 0;
        struct ip_vs_scheduler *sched = NULL;
        struct ip_vs_pe *pe = NULL;
        struct ip_vs_service *svc = NULL;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        /* increase the module use count */
        ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        }
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                ret = -ENOMEM;
                goto out_err;
        }
+       svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!svc->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto out_err;
+       }
 
        /* I'm the first user of the service */
        atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        svc->flags = u->flags;
        svc->timeout = u->timeout * HZ;
        svc->netmask = u->netmask;
+       svc->net = net;
 
        INIT_LIST_HEAD(&svc->destinations);
        rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 
        /* Update the virtual service counters */
        if (svc->port == FTPPORT)
-               atomic_inc(&ip_vs_ftpsvc_counter);
+               atomic_inc(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_inc(&ip_vs_nullsvc_counter);
+               atomic_inc(&ipvs->nullsvc_counter);
 
-       ip_vs_new_estimator(&svc->stats);
+       ip_vs_new_estimator(net, &svc->stats);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services++;
+               ipvs->num_services++;
 
        /* Hash the service into the service table */
        write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        *svc_p = svc;
        return 0;
 
+
  out_err:
        if (svc != NULL) {
                ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                        ip_vs_app_inc_put(svc->inc);
                        local_bh_enable();
                }
+               if (svc->stats.cpustats)
+                       free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
        ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
        old_sched = sched;
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
        struct ip_vs_dest *dest, *nxt;
        struct ip_vs_scheduler *old_sched;
        struct ip_vs_pe *old_pe;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        pr_info("%s: enter\n", __func__);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services--;
+               ipvs->num_services--;
 
-       ip_vs_kill_estimator(&svc->stats);
+       ip_vs_kill_estimator(svc->net, &svc->stats);
 
        /* Unbind scheduler */
        old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
         */
        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
                __ip_vs_unlink_dest(svc, dest, 0);
-               __ip_vs_del_dest(dest);
+               __ip_vs_del_dest(svc->net, dest);
        }
 
        /*
         *    Update the virtual service counters
         */
        if (svc->port == FTPPORT)
-               atomic_dec(&ip_vs_ftpsvc_counter);
+               atomic_dec(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_dec(&ip_vs_nullsvc_counter);
+               atomic_dec(&ipvs->nullsvc_counter);
 
        /*
         *    Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
                              svc->fwmark,
                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
                              ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
 
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *     Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc, *nxt;
 
        /*
-        * Flush the service table hashed by <protocol,addr,port>
+        * Flush the service table hashed by <netns,protocol,addr,port>
         */
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_unlink_service(svc);
+               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+                                        s_list) {
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry_safe(svc, nxt,
                                         &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_unlink_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
        return 0;
 }
 
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc;
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
-       ip_vs_zero_stats(&ip_vs_stats);
+       ip_vs_zero_stats(net_ipvs(net)->tot_stats);
        return 0;
 }
 
@@ -1498,6 +1488,7 @@ static int
 proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+       struct net *net = current->nsproxy->net_ns;
        int *valp = table->data;
        int val = *valp;
        int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
                        /* Restore the correct value */
                        *valp = val;
                } else {
-                       update_defense_level();
+                       update_defense_level(net_ipvs(net));
                }
        }
        return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
        return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               if ((*valp < 0) || (*valp > 1)) {
+                       /* Restore the correct value */
+                       *valp = val;
+               } else {
+                       struct net *net = current->nsproxy->net_ns;
+                       ip_vs_sync_switch_mode(net, val);
+               }
+       }
+       return rc;
+}
 
 /*
  *     IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *     Do not change order or insert new entries without
+ *     align with netns init in __ip_vs_control_init()
  */
 
 static struct ctl_table vs_vars[] = {
        {
                .procname       = "amemthresh",
-               .data           = &sysctl_ip_vs_amemthresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-#ifdef CONFIG_IP_VS_DEBUG
-       {
-               .procname       = "debug_level",
-               .data           = &sysctl_ip_vs_debug_level,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-#endif
        {
                .procname       = "am_droprate",
-               .data           = &sysctl_ip_vs_am_droprate,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
        {
                .procname       = "drop_entry",
-               .data           = &sysctl_ip_vs_drop_entry,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
        },
        {
                .procname       = "drop_packet",
-               .data           = &sysctl_ip_vs_drop_packet,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
 #ifdef CONFIG_IP_VS_NFCT
        {
                .procname       = "conntrack",
-               .data           = &sysctl_ip_vs_conntrack,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
 #endif
        {
                .procname       = "secure_tcp",
-               .data           = &sysctl_ip_vs_secure_tcp,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
        },
        {
                .procname       = "snat_reroute",
-               .data           = &sysctl_ip_vs_snat_reroute,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .procname       = "sync_version",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_sync_mode,
+       },
+       {
+               .procname       = "cache_bypass",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_nodest_conn",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_quiescent_template",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sync_threshold",
+               .maxlen         =
+                       sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+               .mode           = 0644,
+               .proc_handler   = proc_do_sync_threshold,
+       },
+       {
+               .procname       = "nat_icmp_send",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#ifdef CONFIG_IP_VS_DEBUG
+       {
+               .procname       = "debug_level",
+               .data           = &sysctl_ip_vs_debug_level,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#endif
 #if 0
        {
                .procname       = "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
                .proc_handler   = proc_dointvec_jiffies,
        },
 #endif
-       {
-               .procname       = "cache_bypass",
-               .data           = &sysctl_ip_vs_cache_bypass,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_nodest_conn",
-               .data           = &sysctl_ip_vs_expire_nodest_conn,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_quiescent_template",
-               .data           = &sysctl_ip_vs_expire_quiescent_template,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "sync_threshold",
-               .data           = &sysctl_ip_vs_sync_threshold,
-               .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
-               .mode           = 0644,
-               .proc_handler   = proc_do_sync_threshold,
-       },
-       {
-               .procname       = "nat_icmp_send",
-               .data           = &sysctl_ip_vs_nat_icmp_send,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        { }
 };
 
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
 
-static struct ctl_table_header * sysctl_header;
-
 #ifdef CONFIG_PROC_FS
 
 struct ip_vs_iter {
+       struct seq_net_private p;  /* Do not move this, netns depends upon it*/
        struct list_head *table;
        int bucket;
 };
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
 /* Get the Nth entry in the two lists */
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
+       struct net *net = seq_file_net(seq);
        struct ip_vs_iter *iter = seq->private;
        int idx;
        struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* look in hash by protocol */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       if (pos-- == 0){
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_table;
                                iter->bucket = idx;
                                return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* keep looking in fwmark */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       if (pos-- == 0) {
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_fwm_table;
                                iter->bucket = idx;
                                return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
 
 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-       return seq_open_private(file, &ip_vs_info_seq_ops,
+       return seq_open_net(inode, file, &ip_vs_info_seq_ops,
                        sizeof(struct ip_vs_iter));
 }
 
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
 
 #endif
 
-struct ip_vs_stats ip_vs_stats = {
-       .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
        seq_printf(seq,
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-       spin_lock_bh(&ip_vs_stats.lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-                  ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-                  (unsigned long long) ip_vs_stats.ustats.inbytes,
-                  (unsigned long long) ip_vs_stats.ustats.outbytes);
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+                  tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       ip_vs_stats.ustats.cps,
-                       ip_vs_stats.ustats.inpps,
-                       ip_vs_stats.ustats.outpps,
-                       ip_vs_stats.ustats.inbps,
-                       ip_vs_stats.ustats.outbps);
-       spin_unlock_bh(&ip_vs_stats.lock);
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
 
        return 0;
 }
 
 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-       return single_open(file, ip_vs_stats_show, NULL);
+       return single_open_net(inode, file, ip_vs_stats_show);
 }
 
 static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = {
        .release = single_release,
 };
 
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+       int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                "       Total Incoming Outgoing         Incoming         Outgoing\n");
+       seq_printf(seq,
+                  "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+               seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+                           i, u->ustats.conns, u->ustats.inpkts,
+                           u->ustats.outpkts, (__u64)u->ustats.inbytes,
+                           (__u64)u->ustats.outbytes);
+       }
+
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+                  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+                  tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
+
+       return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+       .owner = THIS_MODULE,
+       .open = ip_vs_stats_percpu_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
 #endif
 
 /*
  *     Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+       struct ip_vs_proto_data *pd;
+
        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
                  u->tcp_timeout,
                  u->tcp_fin_timeout,
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
        if (u->tcp_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
                        = u->tcp_timeout * HZ;
        }
 
        if (u->tcp_fin_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
                        = u->tcp_fin_timeout * HZ;
        }
 #endif
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
        if (u->udp_timeout) {
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+               pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+               pd->timeout_table[IP_VS_UDP_S_NORMAL]
                        = u->udp_timeout * HZ;
        }
 #endif
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
+       struct net *net = sock_net(sk);
        int ret;
        unsigned char arg[MAX_ARG_LEN];
        struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
        if (cmd == IP_VS_SO_SET_FLUSH) {
                /* Flush the virtual service */
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
                /* Set timeout values for (tcp tcpfin udp) */
-               ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+               ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+               ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+                                       dm->syncid);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = stop_sync_thread(dm->state);
+               ret = stop_sync_thread(net, dm->state);
                goto out_unlock;
        }
 
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
        if (cmd == IP_VS_SO_SET_ZERO) {
                /* if no service address is set, zero counters in all */
                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-                       ret = ip_vs_zero_all();
+                       ret = ip_vs_zero_all(net);
                        goto out_unlock;
                }
        }
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
        /* Lookup the exact service by <protocol, addr, port> or fwmark */
        if (usvc.fwmark == 0)
-               svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+               svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
                                           &usvc.addr, usvc.port);
        else
-               svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
 
        if (cmd != IP_VS_SO_SET_ADD
            && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                if (svc != NULL)
                        ret = -EEXIST;
                else
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                break;
        case IP_VS_SO_SET_EDIT:
                ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+                           const struct ip_vs_get_services *get,
                            struct ip_vs_get_services __user *uptr)
 {
        int idx, count=0;
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }
 
 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                         struct ip_vs_get_dests __user *uptr)
 {
        struct ip_vs_service *svc;
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
        int ret = 0;
 
        if (get->fwmark)
-               svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
        else
-               svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+               svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
                                           get->port);
 
        if (svc) {
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 }
 
 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+       struct ip_vs_proto_data *pd;
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-       u->tcp_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-       u->tcp_fin_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+       pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+       u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S