]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - net/bridge/br_multicast.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6.git] / net / bridge / br_multicast.c
1 /*
2  * Bridge multicast support.
3  *
4  * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  */
12
13 #include <linux/err.h>
14 #include <linux/if_ether.h>
15 #include <linux/igmp.h>
16 #include <linux/jhash.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/netdevice.h>
20 #include <linux/netfilter_bridge.h>
21 #include <linux/random.h>
22 #include <linux/rculist.h>
23 #include <linux/skbuff.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <net/ip.h>
27
28 #include "br_private.h"
29
30 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
31 {
32         return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
33 }
34
35 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
36         struct net_bridge_mdb_htable *mdb, __be32 dst, int hash)
37 {
38         struct net_bridge_mdb_entry *mp;
39         struct hlist_node *p;
40
41         hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
42                 if (dst == mp->addr)
43                         return mp;
44         }
45
46         return NULL;
47 }
48
49 static struct net_bridge_mdb_entry *br_mdb_ip_get(
50         struct net_bridge_mdb_htable *mdb, __be32 dst)
51 {
52         if (!mdb)
53                 return NULL;
54
55         return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
56 }
57
58 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
59                                         struct sk_buff *skb)
60 {
61         if (br->multicast_disabled)
62                 return NULL;
63
64         switch (skb->protocol) {
65         case htons(ETH_P_IP):
66                 if (BR_INPUT_SKB_CB(skb)->igmp)
67                         break;
68                 return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr);
69         }
70
71         return NULL;
72 }
73
74 static void br_mdb_free(struct rcu_head *head)
75 {
76         struct net_bridge_mdb_htable *mdb =
77                 container_of(head, struct net_bridge_mdb_htable, rcu);
78         struct net_bridge_mdb_htable *old = mdb->old;
79
80         mdb->old = NULL;
81         kfree(old->mhash);
82         kfree(old);
83 }
84
85 static int br_mdb_copy(struct net_bridge_mdb_htable *new,
86                        struct net_bridge_mdb_htable *old,
87                        int elasticity)
88 {
89         struct net_bridge_mdb_entry *mp;
90         struct hlist_node *p;
91         int maxlen;
92         int len;
93         int i;
94
95         for (i = 0; i < old->max; i++)
96                 hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
97                         hlist_add_head(&mp->hlist[new->ver],
98                                        &new->mhash[br_ip_hash(new, mp->addr)]);
99
100         if (!elasticity)
101                 return 0;
102
103         maxlen = 0;
104         for (i = 0; i < new->max; i++) {
105                 len = 0;
106                 hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver])
107                         len++;
108                 if (len > maxlen)
109                         maxlen = len;
110         }
111
112         return maxlen > elasticity ? -EINVAL : 0;
113 }
114
115 static void br_multicast_free_pg(struct rcu_head *head)
116 {
117         struct net_bridge_port_group *p =
118                 container_of(head, struct net_bridge_port_group, rcu);
119
120         kfree(p);
121 }
122
123 static void br_multicast_free_group(struct rcu_head *head)
124 {
125         struct net_bridge_mdb_entry *mp =
126                 container_of(head, struct net_bridge_mdb_entry, rcu);
127
128         kfree(mp);
129 }
130
131 static void br_multicast_group_expired(unsigned long data)
132 {
133         struct net_bridge_mdb_entry *mp = (void *)data;
134         struct net_bridge *br = mp->br;
135         struct net_bridge_mdb_htable *mdb;
136
137         spin_lock(&br->multicast_lock);
138         if (!netif_running(br->dev) || timer_pending(&mp->timer))
139                 goto out;
140
141         if (!hlist_unhashed(&mp->mglist))
142                 hlist_del_init(&mp->mglist);
143
144         if (mp->ports)
145                 goto out;
146
147         mdb = br->mdb;
148         hlist_del_rcu(&mp->hlist[mdb->ver]);
149         mdb->size--;
150
151         del_timer(&mp->query_timer);
152         call_rcu_bh(&mp->rcu, br_multicast_free_group);
153
154 out:
155         spin_unlock(&br->multicast_lock);
156 }
157
158 static void br_multicast_del_pg(struct net_bridge *br,
159                                 struct net_bridge_port_group *pg)
160 {
161         struct net_bridge_mdb_htable *mdb = br->mdb;
162         struct net_bridge_mdb_entry *mp;
163         struct net_bridge_port_group *p;
164         struct net_bridge_port_group **pp;
165
166         mp = br_mdb_ip_get(mdb, pg->addr);
167         if (WARN_ON(!mp))
168                 return;
169
170         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
171                 if (p != pg)
172                         continue;
173
174                 *pp = p->next;
175                 hlist_del_init(&p->mglist);
176                 del_timer(&p->timer);
177                 del_timer(&p->query_timer);
178                 call_rcu_bh(&p->rcu, br_multicast_free_pg);
179
180                 if (!mp->ports && hlist_unhashed(&mp->mglist) &&
181                     netif_running(br->dev))
182                         mod_timer(&mp->timer, jiffies);
183
184                 return;
185         }
186
187         WARN_ON(1);
188 }
189
190 static void br_multicast_port_group_expired(unsigned long data)
191 {
192         struct net_bridge_port_group *pg = (void *)data;
193         struct net_bridge *br = pg->port->br;
194
195         spin_lock(&br->multicast_lock);
196         if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
197             hlist_unhashed(&pg->mglist))
198                 goto out;
199
200         br_multicast_del_pg(br, pg);
201
202 out:
203         spin_unlock(&br->multicast_lock);
204 }
205
206 static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max,
207                          int elasticity)
208 {
209         struct net_bridge_mdb_htable *old = *mdbp;
210         struct net_bridge_mdb_htable *mdb;
211         int err;
212
213         mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
214         if (!mdb)
215                 return -ENOMEM;
216
217         mdb->max = max;
218         mdb->old = old;
219
220         mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC);
221         if (!mdb->mhash) {
222                 kfree(mdb);
223                 return -ENOMEM;
224         }
225
226         mdb->size = old ? old->size : 0;
227         mdb->ver = old ? old->ver ^ 1 : 0;
228
229         if (!old || elasticity)
230                 get_random_bytes(&mdb->secret, sizeof(mdb->secret));
231         else
232                 mdb->secret = old->secret;
233
234         if (!old)
235                 goto out;
236
237         err = br_mdb_copy(mdb, old, elasticity);
238         if (err) {
239                 kfree(mdb->mhash);
240                 kfree(mdb);
241                 return err;
242         }
243
244         call_rcu_bh(&mdb->rcu, br_mdb_free);
245
246 out:
247         rcu_assign_pointer(*mdbp, mdb);
248
249         return 0;
250 }
251
252 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
253                                                 __be32 group)
254 {
255         struct sk_buff *skb;
256         struct igmphdr *ih;
257         struct ethhdr *eth;
258         struct iphdr *iph;
259
260         skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
261                                                  sizeof(*ih) + 4);
262         if (!skb)
263                 goto out;
264
265         skb->protocol = htons(ETH_P_IP);
266
267         skb_reset_mac_header(skb);
268         eth = eth_hdr(skb);
269
270         memcpy(eth->h_source, br->dev->dev_addr, 6);
271         eth->h_dest[0] = 1;
272         eth->h_dest[1] = 0;
273         eth->h_dest[2] = 0x5e;
274         eth->h_dest[3] = 0;
275         eth->h_dest[4] = 0;
276         eth->h_dest[5] = 1;
277         eth->h_proto = htons(ETH_P_IP);
278         skb_put(skb, sizeof(*eth));
279
280         skb_set_network_header(skb, skb->len);
281         iph = ip_hdr(skb);
282
283         iph->version = 4;
284         iph->ihl = 6;
285         iph->tos = 0xc0;
286         iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4);
287         iph->id = 0;
288         iph->frag_off = htons(IP_DF);
289         iph->ttl = 1;
290         iph->protocol = IPPROTO_IGMP;
291         iph->saddr = 0;
292         iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
293         ((u8 *)&iph[1])[0] = IPOPT_RA;
294         ((u8 *)&iph[1])[1] = 4;
295         ((u8 *)&iph[1])[2] = 0;
296         ((u8 *)&iph[1])[3] = 0;
297         ip_send_check(iph);
298         skb_put(skb, 24);
299
300         skb_set_transport_header(skb, skb->len);
301         ih = igmp_hdr(skb);
302         ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
303         ih->code = (group ? br->multicast_last_member_interval :
304                             br->multicast_query_response_interval) /
305                    (HZ / IGMP_TIMER_SCALE);
306         ih->group = group;
307         ih->csum = 0;
308         ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
309         skb_put(skb, sizeof(*ih));
310
311         __skb_pull(skb, sizeof(*eth));
312
313 out:
314         return skb;
315 }
316
317 static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
318 {
319         struct net_bridge *br = mp->br;
320         struct sk_buff *skb;
321
322         skb = br_multicast_alloc_query(br, mp->addr);
323         if (!skb)
324                 goto timer;
325
326         netif_rx(skb);
327
328 timer:
329         if (++mp->queries_sent < br->multicast_last_member_count)
330                 mod_timer(&mp->query_timer,
331                           jiffies + br->multicast_last_member_interval);
332 }
333
334 static void br_multicast_group_query_expired(unsigned long data)
335 {
336         struct net_bridge_mdb_entry *mp = (void *)data;
337         struct net_bridge *br = mp->br;
338
339         spin_lock(&br->multicast_lock);
340         if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) ||
341             mp->queries_sent >= br->multicast_last_member_count)
342                 goto out;
343
344         br_multicast_send_group_query(mp);
345
346 out:
347         spin_unlock(&br->multicast_lock);
348 }
349
350 static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
351 {
352         struct net_bridge_port *port = pg->port;
353         struct net_bridge *br = port->br;
354         struct sk_buff *skb;
355
356         skb = br_multicast_alloc_query(br, pg->addr);
357         if (!skb)
358                 goto timer;
359
360         br_deliver(port, skb);
361
362 timer:
363         if (++pg->queries_sent < br->multicast_last_member_count)
364                 mod_timer(&pg->query_timer,
365                           jiffies + br->multicast_last_member_interval);
366 }
367
368 static void br_multicast_port_group_query_expired(unsigned long data)
369 {
370         struct net_bridge_port_group *pg = (void *)data;
371         struct net_bridge_port *port = pg->port;
372         struct net_bridge *br = port->br;
373
374         spin_lock(&br->multicast_lock);
375         if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
376             pg->queries_sent >= br->multicast_last_member_count)
377                 goto out;
378
379         br_multicast_send_port_group_query(pg);
380
381 out:
382         spin_unlock(&br->multicast_lock);
383 }
384
385 static struct net_bridge_mdb_entry *br_multicast_get_group(
386         struct net_bridge *br, struct net_bridge_port *port, __be32 group,
387         int hash)
388 {
389         struct net_bridge_mdb_htable *mdb = br->mdb;
390         struct net_bridge_mdb_entry *mp;
391         struct hlist_node *p;
392         unsigned count = 0;
393         unsigned max;
394         int elasticity;
395         int err;
396
397         hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
398                 count++;
399                 if (unlikely(group == mp->addr)) {
400                         return mp;
401                 }
402         }
403
404         elasticity = 0;
405         max = mdb->max;
406
407         if (unlikely(count > br->hash_elasticity && count)) {
408                 if (net_ratelimit())
409                         printk(KERN_INFO "%s: Multicast hash table "
410                                "chain limit reached: %s\n",
411                                br->dev->name, port ? port->dev->name :
412                                                      br->dev->name);
413
414                 elasticity = br->hash_elasticity;
415         }
416
417         if (mdb->size >= max) {
418                 max *= 2;
419                 if (unlikely(max >= br->hash_max)) {
420                         printk(KERN_WARNING "%s: Multicast hash table maximum "
421                                "reached, disabling snooping: %s, %d\n",
422                                br->dev->name, port ? port->dev->name :
423                                                      br->dev->name,
424                                max);
425                         err = -E2BIG;
426 disable:
427                         br->multicast_disabled = 1;
428                         goto err;
429                 }
430         }
431
432         if (max > mdb->max || elasticity) {
433                 if (mdb->old) {
434                         if (net_ratelimit())
435                                 printk(KERN_INFO "%s: Multicast hash table "
436                                        "on fire: %s\n",
437                                        br->dev->name, port ? port->dev->name :
438                                                              br->dev->name);
439                         err = -EEXIST;
440                         goto err;
441                 }
442
443                 err = br_mdb_rehash(&br->mdb, max, elasticity);
444                 if (err) {
445                         printk(KERN_WARNING "%s: Cannot rehash multicast "
446                                "hash table, disabling snooping: "
447                                "%s, %d, %d\n",
448                                br->dev->name, port ? port->dev->name :
449                                                      br->dev->name,
450                                mdb->size, err);
451                         goto disable;
452                 }
453
454                 err = -EAGAIN;
455                 goto err;
456         }
457
458         return NULL;
459
460 err:
461         mp = ERR_PTR(err);
462         return mp;
463 }
464
465 static struct net_bridge_mdb_entry *br_multicast_new_group(
466         struct net_bridge *br, struct net_bridge_port *port, __be32 group)
467 {
468         struct net_bridge_mdb_htable *mdb = br->mdb;
469         struct net_bridge_mdb_entry *mp;
470         int hash;
471
472         if (!mdb) {
473                 if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
474                         return NULL;
475                 goto rehash;
476         }
477
478         hash = br_ip_hash(mdb, group);
479         mp = br_multicast_get_group(br, port, group, hash);
480         switch (PTR_ERR(mp)) {
481         case 0:
482                 break;
483
484         case -EAGAIN:
485 rehash:
486                 mdb = br->mdb;
487                 hash = br_ip_hash(mdb, group);
488                 break;
489
490         default:
491                 goto out;
492         }
493
494         mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
495         if (unlikely(!mp))
496                 goto out;
497
498         mp->br = br;
499         mp->addr = group;
500         setup_timer(&mp->timer, br_multicast_group_expired,
501                     (unsigned long)mp);
502         setup_timer(&mp->query_timer, br_multicast_group_query_expired,
503                     (unsigned long)mp);
504
505         hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
506         mdb->size++;
507
508 out:
509         return mp;
510 }
511
512 static int br_multicast_add_group(struct net_bridge *br,
513                                   struct net_bridge_port *port, __be32 group)
514 {
515         struct net_bridge_mdb_entry *mp;
516         struct net_bridge_port_group *p;
517         struct net_bridge_port_group **pp;
518         unsigned long now = jiffies;
519         int err;
520
521         if (ipv4_is_local_multicast(group))
522                 return 0;
523
524         spin_lock(&br->multicast_lock);
525         if (!netif_running(br->dev) ||
526             (port && port->state == BR_STATE_DISABLED))
527                 goto out;
528
529         mp = br_multicast_new_group(br, port, group);
530         err = PTR_ERR(mp);
531         if (unlikely(IS_ERR(mp) || !mp))
532                 goto err;
533
534         if (!port) {
535                 hlist_add_head(&mp->mglist, &br->mglist);
536                 mod_timer(&mp->timer, now + br->multicast_membership_interval);
537                 goto out;
538         }
539
540         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
541                 if (p->port == port)
542                         goto found;
543                 if ((unsigned long)p->port < (unsigned long)port)
544                         break;
545         }
546
547         p = kzalloc(sizeof(*p), GFP_ATOMIC);
548         err = -ENOMEM;
549         if (unlikely(!p))
550                 goto err;
551
552         p->addr = group;
553         p->port = port;
554         p->next = *pp;
555         hlist_add_head(&p->mglist, &port->mglist);
556         setup_timer(&p->timer, br_multicast_port_group_expired,
557                     (unsigned long)p);
558         setup_timer(&p->query_timer, br_multicast_port_group_query_expired,
559                     (unsigned long)p);
560
561         rcu_assign_pointer(*pp, p);
562
563 found:
564         mod_timer(&p->timer, now + br->multicast_membership_interval);
565 out:
566         err = 0;
567
568 err:
569         spin_unlock(&br->multicast_lock);
570         return err;
571 }
572
573 static void br_multicast_router_expired(unsigned long data)
574 {
575         struct net_bridge_port *port = (void *)data;
576         struct net_bridge *br = port->br;
577
578         spin_lock(&br->multicast_lock);
579         if (port->multicast_router != 1 ||
580             timer_pending(&port->multicast_router_timer) ||
581             hlist_unhashed(&port->rlist))
582                 goto out;
583
584         hlist_del_init_rcu(&port->rlist);
585
586 out:
587         spin_unlock(&br->multicast_lock);
588 }
589
590 static void br_multicast_local_router_expired(unsigned long data)
591 {
592 }
593
594 static void br_multicast_send_query(struct net_bridge *br,
595                                     struct net_bridge_port *port, u32 sent)
596 {
597         unsigned long time;
598         struct sk_buff *skb;
599
600         if (!netif_running(br->dev) || br->multicast_disabled ||
601             timer_pending(&br->multicast_querier_timer))
602                 return;
603
604         skb = br_multicast_alloc_query(br, 0);
605         if (!skb)
606                 goto timer;
607
608         if (port) {
609                 __skb_push(skb, sizeof(struct ethhdr));
610                 skb->dev = port->dev;
611                 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
612                         dev_queue_xmit);
613         } else
614                 netif_rx(skb);
615
616 timer:
617         time = jiffies;
618         time += sent < br->multicast_startup_query_count ?
619                 br->multicast_startup_query_interval :
620                 br->multicast_query_interval;
621         mod_timer(port ? &port->multicast_query_timer :
622                          &br->multicast_query_timer, time);
623 }
624
625 static void br_multicast_port_query_expired(unsigned long data)
626 {
627         struct net_bridge_port *port = (void *)data;
628         struct net_bridge *br = port->br;
629
630         spin_lock(&br->multicast_lock);
631         if (port->state == BR_STATE_DISABLED ||
632             port->state == BR_STATE_BLOCKING)
633                 goto out;
634
635         if (port->multicast_startup_queries_sent <
636             br->multicast_startup_query_count)
637                 port->multicast_startup_queries_sent++;
638
639         br_multicast_send_query(port->br, port,
640                                 port->multicast_startup_queries_sent);
641
642 out:
643         spin_unlock(&br->multicast_lock);
644 }
645
646 void br_multicast_add_port(struct net_bridge_port *port)
647 {
648         port->multicast_router = 1;
649
650         setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
651                     (unsigned long)port);
652         setup_timer(&port->multicast_query_timer,
653                     br_multicast_port_query_expired, (unsigned long)port);
654 }
655
656 void br_multicast_del_port(struct net_bridge_port *port)
657 {
658         del_timer_sync(&port->multicast_router_timer);
659 }
660
661 static void __br_multicast_enable_port(struct net_bridge_port *port)
662 {
663         port->multicast_startup_queries_sent = 0;
664
665         if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 ||
666             del_timer(&port->multicast_query_timer))
667                 mod_timer(&port->multicast_query_timer, jiffies);
668 }
669
670 void br_multicast_enable_port(struct net_bridge_port *port)
671 {
672         struct net_bridge *br = port->br;
673
674         spin_lock(&br->multicast_lock);
675         if (br->multicast_disabled || !netif_running(br->dev))
676                 goto out;
677
678         __br_multicast_enable_port(port);
679
680 out:
681         spin_unlock(&br->multicast_lock);
682 }
683
684 void br_multicast_disable_port(struct net_bridge_port *port)
685 {
686         struct net_bridge *br = port->br;
687         struct net_bridge_port_group *pg;
688         struct hlist_node *p, *n;
689
690         spin_lock(&br->multicast_lock);
691         hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist)
692                 br_multicast_del_pg(br, pg);
693
694         if (!hlist_unhashed(&port->rlist))
695                 hlist_del_init_rcu(&port->rlist);
696         del_timer(&port->multicast_router_timer);
697         del_timer(&port->multicast_query_timer);
698         spin_unlock(&br->multicast_lock);
699 }
700
701 static int br_multicast_igmp3_report(struct net_bridge *br,
702                                      struct net_bridge_port *port,
703                                      struct sk_buff *skb)
704 {
705         struct igmpv3_report *ih;
706         struct igmpv3_grec *grec;
707         int i;
708         int len;
709         int num;
710         int type;
711         int err = 0;
712         __be32 group;
713
714         if (!pskb_may_pull(skb, sizeof(*ih)))
715                 return -EINVAL;
716
717         ih = igmpv3_report_hdr(skb);
718         num = ntohs(ih->ngrec);
719         len = sizeof(*ih);
720
721         for (i = 0; i < num; i++) {
722                 len += sizeof(*grec);
723                 if (!pskb_may_pull(skb, len))
724                         return -EINVAL;
725
726                 grec = (void *)(skb->data + len - sizeof(*grec));
727                 group = grec->grec_mca;
728                 type = grec->grec_type;
729
730                 len += ntohs(grec->grec_nsrcs) * 4;
731                 if (!pskb_may_pull(skb, len))
732                         return -EINVAL;
733
734                 /* We treat this as an IGMPv2 report for now. */
735                 switch (type) {
736                 case IGMPV3_MODE_IS_INCLUDE:
737                 case IGMPV3_MODE_IS_EXCLUDE:
738                 case IGMPV3_CHANGE_TO_INCLUDE:
739                 case IGMPV3_CHANGE_TO_EXCLUDE:
740                 case IGMPV3_ALLOW_NEW_SOURCES:
741                 case IGMPV3_BLOCK_OLD_SOURCES:
742                         break;
743
744                 default:
745                         continue;
746                 }
747
748                 err = br_multicast_add_group(br, port, group);
749                 if (err)
750                         break;
751         }
752
753         return err;
754 }
755
756 static void br_multicast_add_router(struct net_bridge *br,
757                                     struct net_bridge_port *port)
758 {
759         struct hlist_node *p;
760         struct hlist_node **h;
761
762         for (h = &br->router_list.first;
763              (p = *h) &&
764              (unsigned long)container_of(p, struct net_bridge_port, rlist) >
765              (unsigned long)port;
766              h = &p->next)
767                 ;
768
769         port->rlist.pprev = h;
770         port->rlist.next = p;
771         rcu_assign_pointer(*h, &port->rlist);
772         if (p)
773                 p->pprev = &port->rlist.next;
774 }
775
776 static void br_multicast_mark_router(struct net_bridge *br,
777                                      struct net_bridge_port *port)
778 {
779         unsigned long now = jiffies;
780
781         if (!port) {
782                 if (br->multicast_router == 1)
783                         mod_timer(&br->multicast_router_timer,
784                                   now + br->multicast_querier_interval);
785                 return;
786         }
787
788         if (port->multicast_router != 1)
789                 return;
790
791         if (!hlist_unhashed(&port->rlist))
792                 goto timer;
793
794         br_multicast_add_router(br, port);
795
796 timer:
797         mod_timer(&port->multicast_router_timer,
798                   now + br->multicast_querier_interval);
799 }
800
801 static void br_multicast_query_received(struct net_bridge *br,
802                                         struct net_bridge_port *port,
803                                         __be32 saddr)
804 {
805         if (saddr)
806                 mod_timer(&br->multicast_querier_timer,
807                           jiffies + br->multicast_querier_interval);
808         else if (timer_pending(&br->multicast_querier_timer))
809                 return;
810
811         br_multicast_mark_router(br, port);
812 }
813
814 static int br_multicast_query(struct net_bridge *br,
815                               struct net_bridge_port *port,
816                               struct sk_buff *skb)
817 {
818         struct iphdr *iph = ip_hdr(skb);
819         struct igmphdr *ih = igmp_hdr(skb);
820         struct net_bridge_mdb_entry *mp;
821         struct igmpv3_query *ih3;
822         struct net_bridge_port_group *p;
823         struct net_bridge_port_group **pp;
824         unsigned long max_delay;
825         unsigned long now = jiffies;
826         __be32 group;
827         int err = 0;
828
829         spin_lock(&br->multicast_lock);
830         if (!netif_running(br->dev) ||
831             (port && port->state == BR_STATE_DISABLED))
832                 goto out;
833
834         br_multicast_query_received(br, port, iph->saddr);
835
836         group = ih->group;
837
838         if (skb->len == sizeof(*ih)) {
839                 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
840
841                 if (!max_delay) {
842                         max_delay = 10 * HZ;
843                         group = 0;
844                 }
845         } else {
846                 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
847                         err = -EINVAL;
848                         goto out;
849                 }
850
851                 ih3 = igmpv3_query_hdr(skb);
852                 if (ih3->nsrcs)
853                         goto out;
854
855                 max_delay = ih3->code ?
856                             IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
857         }
858
859         if (!group)
860                 goto out;
861
862         mp = br_mdb_ip_get(br->mdb, group);
863         if (!mp)
864                 goto out;
865
866         max_delay *= br->multicast_last_member_count;
867
868         if (!hlist_unhashed(&mp->mglist) &&
869             (timer_pending(&mp->timer) ?
870              time_after(mp->timer.expires, now + max_delay) :
871              try_to_del_timer_sync(&mp->timer) >= 0))
872                 mod_timer(&mp->timer, now + max_delay);
873
874         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
875                 if (timer_pending(&p->timer) ?
876                     time_after(p->timer.expires, now + max_delay) :
877                     try_to_del_timer_sync(&p->timer) >= 0)
878                         mod_timer(&mp->timer, now + max_delay);
879         }
880
881 out:
882         spin_unlock(&br->multicast_lock);
883         return err;
884 }
885
886 static void br_multicast_leave_group(struct net_bridge *br,
887                                      struct net_bridge_port *port,
888                                      __be32 group)
889 {
890         struct net_bridge_mdb_htable *mdb;
891         struct net_bridge_mdb_entry *mp;
892         struct net_bridge_port_group *p;
893         unsigned long now;
894         unsigned long time;
895
896         if (ipv4_is_local_multicast(group))
897                 return;
898
899         spin_lock(&br->multicast_lock);
900         if (!netif_running(br->dev) ||
901             (port && port->state == BR_STATE_DISABLED) ||
902             timer_pending(&br->multicast_querier_timer))
903                 goto out;
904
905         mdb = br->mdb;
906         mp = br_mdb_ip_get(mdb, group);
907         if (!mp)
908                 goto out;
909
910         now = jiffies;
911         time = now + br->multicast_last_member_count *
912                      br->multicast_last_member_interval;
913
914         if (!port) {
915                 if (!hlist_unhashed(&mp->mglist) &&
916                     (timer_pending(&mp->timer) ?
917                      time_after(mp->timer.expires, time) :
918                      try_to_del_timer_sync(&mp->timer) >= 0)) {
919                         mod_timer(&mp->timer, time);
920
921                         mp->queries_sent = 0;
922                         mod_timer(&mp->query_timer, now);
923                 }
924
925                 goto out;
926         }
927
928         for (p = mp->ports; p; p = p->next) {
929                 if (p->port != port)
930                         continue;
931
932                 if (!hlist_unhashed(&p->mglist) &&
933                     (timer_pending(&p->timer) ?
934                      time_after(p->timer.expires, time) :
935                      try_to_del_timer_sync(&p->timer) >= 0)) {
936                         mod_timer(&p->timer, time);
937
938                         p->queries_sent = 0;
939                         mod_timer(&p->query_timer, now);
940                 }
941
942                 break;
943         }
944
945 out:
946         spin_unlock(&br->multicast_lock);
947 }
948
949 static int br_multicast_ipv4_rcv(struct net_bridge *br,
950                                  struct net_bridge_port *port,
951                                  struct sk_buff *skb)
952 {
953         struct sk_buff *skb2 = skb;
954         struct iphdr *iph;
955         struct igmphdr *ih;
956         unsigned len;
957         unsigned offset;
958         int err;
959
960         BR_INPUT_SKB_CB(skb)->igmp = 0;
961         BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
962
963         /* We treat OOM as packet loss for now. */
964         if (!pskb_may_pull(skb, sizeof(*iph)))
965                 return -EINVAL;
966
967         iph = ip_hdr(skb);
968
969         if (iph->ihl < 5 || iph->version != 4)
970                 return -EINVAL;
971
972         if (!pskb_may_pull(skb, ip_hdrlen(skb)))
973                 return -EINVAL;
974
975         iph = ip_hdr(skb);
976
977         if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
978                 return -EINVAL;
979
980         if (iph->protocol != IPPROTO_IGMP)
981                 return 0;
982
983         len = ntohs(iph->tot_len);
984         if (skb->len < len || len < ip_hdrlen(skb))
985                 return -EINVAL;
986
987         if (skb->len > len) {
988                 skb2 = skb_clone(skb, GFP_ATOMIC);
989                 if (!skb2)
990                         return -ENOMEM;
991
992                 err = pskb_trim_rcsum(skb2, len);
993                 if (err)
994                         goto err_out;
995         }
996
997         len -= ip_hdrlen(skb2);
998         offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
999         __skb_pull(skb2, offset);
1000         skb_reset_transport_header(skb2);
1001
1002         err = -EINVAL;
1003         if (!pskb_may_pull(skb2, sizeof(*ih)))
1004                 goto out;
1005
1006         switch (skb2->ip_summed) {
1007         case CHECKSUM_COMPLETE:
1008                 if (!csum_fold(skb2->csum))
1009                         break;
1010                 /* fall through */
1011         case CHECKSUM_NONE:
1012                 skb2->csum = 0;
1013                 if (skb_checksum_complete(skb2))
1014                         goto out;
1015         }
1016
1017         err = 0;
1018
1019         BR_INPUT_SKB_CB(skb)->igmp = 1;
1020         ih = igmp_hdr(skb2);
1021
1022         switch (ih->type) {
1023         case IGMP_HOST_MEMBERSHIP_REPORT:
1024         case IGMPV2_HOST_MEMBERSHIP_REPORT:
1025                 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
1026                 err = br_multicast_add_group(br, port, ih->group);
1027                 break;
1028         case IGMPV3_HOST_MEMBERSHIP_REPORT:
1029                 err = br_multicast_igmp3_report(br, port, skb2);
1030                 break;
1031         case IGMP_HOST_MEMBERSHIP_QUERY:
1032                 err = br_multicast_query(br, port, skb2);
1033                 break;
1034         case IGMP_HOST_LEAVE_MESSAGE:
1035                 br_multicast_leave_group(br, port, ih->group);
1036                 break;
1037         }
1038
1039 out:
1040         __skb_push(skb2, offset);
1041 err_out:
1042         if (skb2 != skb)
1043                 kfree_skb(skb2);
1044         return err;
1045 }
1046
1047 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
1048                      struct sk_buff *skb)
1049 {
1050         if (br->multicast_disabled)
1051                 return 0;
1052
1053         switch (skb->protocol) {
1054         case htons(ETH_P_IP):
1055                 return br_multicast_ipv4_rcv(br, port, skb);
1056         }
1057
1058         return 0;
1059 }
1060
1061 static void br_multicast_query_expired(unsigned long data)
1062 {
1063         struct net_bridge *br = (void *)data;
1064
1065         spin_lock(&br->multicast_lock);
1066         if (br->multicast_startup_queries_sent <
1067             br->multicast_startup_query_count)
1068                 br->multicast_startup_queries_sent++;
1069
1070         br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent);
1071
1072         spin_unlock(&br->multicast_lock);
1073 }
1074
1075 void br_multicast_init(struct net_bridge *br)
1076 {
1077         br->hash_elasticity = 4;
1078         br->hash_max = 512;
1079
1080         br->multicast_router = 1;
1081         br->multicast_last_member_count = 2;
1082         br->multicast_startup_query_count = 2;
1083
1084         br->multicast_last_member_interval = HZ;
1085         br->multicast_query_response_interval = 10 * HZ;
1086         br->multicast_startup_query_interval = 125 * HZ / 4;
1087         br->multicast_query_interval = 125 * HZ;
1088         br->multicast_querier_interval = 255 * HZ;
1089         br->multicast_membership_interval = 260 * HZ;
1090
1091         spin_lock_init(&br->multicast_lock);
1092         setup_timer(&br->multicast_router_timer,
1093                     br_multicast_local_router_expired, 0);
1094         setup_timer(&br->multicast_querier_timer,
1095                     br_multicast_local_router_expired, 0);
1096         setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
1097                     (unsigned long)br);
1098 }
1099
1100 void br_multicast_open(struct net_bridge *br)
1101 {
1102         br->multicast_startup_queries_sent = 0;
1103
1104         if (br->multicast_disabled)
1105                 return;
1106
1107         mod_timer(&br->multicast_query_timer, jiffies);
1108 }
1109
1110 void br_multicast_stop(struct net_bridge *br)
1111 {
1112         struct net_bridge_mdb_htable *mdb;
1113         struct net_bridge_mdb_entry *mp;
1114         struct hlist_node *p, *n;
1115         u32 ver;
1116         int i;
1117
1118         del_timer_sync(&br->multicast_router_timer);
1119         del_timer_sync(&br->multicast_querier_timer);
1120         del_timer_sync(&br->multicast_query_timer);
1121
1122         spin_lock_bh(&br->multicast_lock);
1123         mdb = br->mdb;
1124         if (!mdb)
1125                 goto out;
1126
1127         br->mdb = NULL;
1128
1129         ver = mdb->ver;
1130         for (i = 0; i < mdb->max; i++) {
1131                 hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i],
1132                                           hlist[ver]) {
1133                         del_timer(&mp->timer);
1134                         del_timer(&mp->query_timer);
1135                         call_rcu_bh(&mp->rcu, br_multicast_free_group);
1136                 }
1137         }
1138
1139         if (mdb->old) {
1140                 spin_unlock_bh(&br->multicast_lock);
1141                 rcu_barrier_bh();
1142                 spin_lock_bh(&br->multicast_lock);
1143                 WARN_ON(mdb->old);
1144         }
1145
1146         mdb->old = mdb;
1147         call_rcu_bh(&mdb->rcu, br_mdb_free);
1148
1149 out:
1150         spin_unlock_bh(&br->multicast_lock);
1151 }
1152
1153 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1154 {
1155         int err = -ENOENT;
1156
1157         spin_lock_bh(&br->multicast_lock);
1158         if (!netif_running(br->dev))
1159                 goto unlock;
1160
1161         switch (val) {
1162         case 0:
1163         case 2:
1164                 del_timer(&br->multicast_router_timer);
1165                 /* fall through */
1166         case 1:
1167                 br->multicast_router = val;
1168                 err = 0;
1169                 break;
1170
1171         default:
1172                 err = -EINVAL;
1173                 break;
1174         }
1175
1176 unlock:
1177         spin_unlock_bh(&br->multicast_lock);
1178
1179         return err;
1180 }
1181
1182 int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
1183 {
1184         struct net_bridge *br = p->br;
1185         int err = -ENOENT;
1186
1187         spin_lock(&br->multicast_lock);
1188         if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED)
1189                 goto unlock;
1190
1191         switch (val) {
1192         case 0:
1193         case 1:
1194         case 2:
1195                 p->multicast_router = val;
1196                 err = 0;
1197
1198                 if (val < 2 && !hlist_unhashed(&p->rlist))
1199                         hlist_del_init_rcu(&p->rlist);
1200
1201                 if (val == 1)
1202                         break;
1203
1204                 del_timer(&p->multicast_router_timer);
1205
1206                 if (val == 0)
1207                         break;
1208
1209                 br_multicast_add_router(br, p);
1210                 break;
1211
1212         default:
1213                 err = -EINVAL;
1214                 break;
1215         }
1216
1217 unlock:
1218         spin_unlock(&br->multicast_lock);
1219
1220         return err;
1221 }
1222
1223 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1224 {
1225         struct net_bridge_port *port;
1226         int err = -ENOENT;
1227
1228         spin_lock(&br->multicast_lock);
1229         if (!netif_running(br->dev))
1230                 goto unlock;
1231
1232         err = 0;
1233         if (br->multicast_disabled == !val)
1234                 goto unlock;
1235
1236         br->multicast_disabled = !val;
1237         if (br->multicast_disabled)
1238                 goto unlock;
1239
1240         if (br->mdb) {
1241                 if (br->mdb->old) {
1242                         err = -EEXIST;
1243 rollback:
1244                         br->multicast_disabled = !!val;
1245                         goto unlock;
1246                 }
1247
1248                 err = br_mdb_rehash(&br->mdb, br->mdb->max,
1249                                     br->hash_elasticity);
1250                 if (err)
1251                         goto rollback;
1252         }
1253
1254         br_multicast_open(br);
1255         list_for_each_entry(port, &br->port_list, list) {
1256                 if (port->state == BR_STATE_DISABLED ||
1257                     port->state == BR_STATE_BLOCKING)
1258                         continue;
1259
1260                 __br_multicast_enable_port(port);
1261         }
1262
1263 unlock:
1264         spin_unlock(&br->multicast_lock);
1265
1266         return err;
1267 }
1268
1269 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
1270 {
1271         int err = -ENOENT;
1272         u32 old;
1273
1274         spin_lock(&br->multicast_lock);
1275         if (!netif_running(br->dev))
1276                 goto unlock;
1277
1278         err = -EINVAL;
1279         if (!is_power_of_2(val))
1280                 goto unlock;
1281         if (br->mdb && val < br->mdb->size)
1282                 goto unlock;
1283
1284         err = 0;
1285
1286         old = br->hash_max;
1287         br->hash_max = val;
1288
1289         if (br->mdb) {
1290                 if (br->mdb->old) {
1291                         err = -EEXIST;
1292 rollback:
1293                         br->hash_max = old;
1294                         goto unlock;
1295                 }
1296
1297                 err = br_mdb_rehash(&br->mdb, br->hash_max,
1298                                     br->hash_elasticity);
1299                 if (err)
1300                         goto rollback;
1301         }
1302
1303 unlock:
1304         spin_unlock(&br->multicast_lock);
1305
1306         return err;
1307 }