bridge: Add hash elasticity/max sysfs entries
[linux-2.6.git] / net / bridge / br_multicast.c
1 /*
2  * Bridge multicast support.
3  *
4  * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  */
12
13 #include <linux/err.h>
14 #include <linux/if_ether.h>
15 #include <linux/igmp.h>
16 #include <linux/jhash.h>
17 #include <linux/kernel.h>
18 #include <linux/log2.h>
19 #include <linux/netdevice.h>
20 #include <linux/netfilter_bridge.h>
21 #include <linux/random.h>
22 #include <linux/rculist.h>
23 #include <linux/skbuff.h>
24 #include <linux/slab.h>
25 #include <linux/timer.h>
26 #include <net/ip.h>
27
28 #include "br_private.h"
29
30 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
31 {
32         return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1);
33 }
34
35 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
36         struct net_bridge_mdb_htable *mdb, __be32 dst, int hash)
37 {
38         struct net_bridge_mdb_entry *mp;
39         struct hlist_node *p;
40
41         hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
42                 if (dst == mp->addr)
43                         return mp;
44         }
45
46         return NULL;
47 }
48
49 static struct net_bridge_mdb_entry *br_mdb_ip_get(
50         struct net_bridge_mdb_htable *mdb, __be32 dst)
51 {
52         return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
53 }
54
55 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
56                                         struct sk_buff *skb)
57 {
58         struct net_bridge_mdb_htable *mdb = br->mdb;
59
60         if (!mdb || br->multicast_disabled)
61                 return NULL;
62
63         switch (skb->protocol) {
64         case htons(ETH_P_IP):
65                 if (BR_INPUT_SKB_CB(skb)->igmp)
66                         break;
67                 return br_mdb_ip_get(mdb, ip_hdr(skb)->daddr);
68         }
69
70         return NULL;
71 }
72
73 static void br_mdb_free(struct rcu_head *head)
74 {
75         struct net_bridge_mdb_htable *mdb =
76                 container_of(head, struct net_bridge_mdb_htable, rcu);
77         struct net_bridge_mdb_htable *old = mdb->old;
78
79         mdb->old = NULL;
80         kfree(old->mhash);
81         kfree(old);
82 }
83
84 static int br_mdb_copy(struct net_bridge_mdb_htable *new,
85                        struct net_bridge_mdb_htable *old,
86                        int elasticity)
87 {
88         struct net_bridge_mdb_entry *mp;
89         struct hlist_node *p;
90         int maxlen;
91         int len;
92         int i;
93
94         for (i = 0; i < old->max; i++)
95                 hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
96                         hlist_add_head(&mp->hlist[new->ver],
97                                        &new->mhash[br_ip_hash(new, mp->addr)]);
98
99         if (!elasticity)
100                 return 0;
101
102         maxlen = 0;
103         for (i = 0; i < new->max; i++) {
104                 len = 0;
105                 hlist_for_each_entry(mp, p, &new->mhash[i], hlist[new->ver])
106                         len++;
107                 if (len > maxlen)
108                         maxlen = len;
109         }
110
111         return maxlen > elasticity ? -EINVAL : 0;
112 }
113
114 static void br_multicast_free_pg(struct rcu_head *head)
115 {
116         struct net_bridge_port_group *p =
117                 container_of(head, struct net_bridge_port_group, rcu);
118
119         kfree(p);
120 }
121
122 static void br_multicast_free_group(struct rcu_head *head)
123 {
124         struct net_bridge_mdb_entry *mp =
125                 container_of(head, struct net_bridge_mdb_entry, rcu);
126
127         kfree(mp);
128 }
129
130 static void br_multicast_group_expired(unsigned long data)
131 {
132         struct net_bridge_mdb_entry *mp = (void *)data;
133         struct net_bridge *br = mp->br;
134         struct net_bridge_mdb_htable *mdb;
135
136         spin_lock(&br->multicast_lock);
137         if (!netif_running(br->dev) || timer_pending(&mp->timer))
138                 goto out;
139
140         if (!hlist_unhashed(&mp->mglist))
141                 hlist_del_init(&mp->mglist);
142
143         if (mp->ports)
144                 goto out;
145
146         mdb = br->mdb;
147         hlist_del_rcu(&mp->hlist[mdb->ver]);
148         mdb->size--;
149
150         del_timer(&mp->query_timer);
151         call_rcu_bh(&mp->rcu, br_multicast_free_group);
152
153 out:
154         spin_unlock(&br->multicast_lock);
155 }
156
157 static void br_multicast_del_pg(struct net_bridge *br,
158                                 struct net_bridge_port_group *pg)
159 {
160         struct net_bridge_mdb_htable *mdb = br->mdb;
161         struct net_bridge_mdb_entry *mp;
162         struct net_bridge_port_group *p;
163         struct net_bridge_port_group **pp;
164
165         mp = br_mdb_ip_get(mdb, pg->addr);
166         if (WARN_ON(!mp))
167                 return;
168
169         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
170                 if (p != pg)
171                         continue;
172
173                 *pp = p->next;
174                 hlist_del_init(&p->mglist);
175                 del_timer(&p->timer);
176                 del_timer(&p->query_timer);
177                 call_rcu_bh(&p->rcu, br_multicast_free_pg);
178
179                 if (!mp->ports && hlist_unhashed(&mp->mglist) &&
180                     netif_running(br->dev))
181                         mod_timer(&mp->timer, jiffies);
182
183                 return;
184         }
185
186         WARN_ON(1);
187 }
188
189 static void br_multicast_port_group_expired(unsigned long data)
190 {
191         struct net_bridge_port_group *pg = (void *)data;
192         struct net_bridge *br = pg->port->br;
193
194         spin_lock(&br->multicast_lock);
195         if (!netif_running(br->dev) || timer_pending(&pg->timer) ||
196             hlist_unhashed(&pg->mglist))
197                 goto out;
198
199         br_multicast_del_pg(br, pg);
200
201 out:
202         spin_unlock(&br->multicast_lock);
203 }
204
205 static int br_mdb_rehash(struct net_bridge_mdb_htable **mdbp, int max,
206                          int elasticity)
207 {
208         struct net_bridge_mdb_htable *old = *mdbp;
209         struct net_bridge_mdb_htable *mdb;
210         int err;
211
212         mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
213         if (!mdb)
214                 return -ENOMEM;
215
216         mdb->max = max;
217         mdb->old = old;
218
219         mdb->mhash = kzalloc(max * sizeof(*mdb->mhash), GFP_ATOMIC);
220         if (!mdb->mhash) {
221                 kfree(mdb);
222                 return -ENOMEM;
223         }
224
225         mdb->size = old ? old->size : 0;
226         mdb->ver = old ? old->ver ^ 1 : 0;
227
228         if (!old || elasticity)
229                 get_random_bytes(&mdb->secret, sizeof(mdb->secret));
230         else
231                 mdb->secret = old->secret;
232
233         if (!old)
234                 goto out;
235
236         err = br_mdb_copy(mdb, old, elasticity);
237         if (err) {
238                 kfree(mdb->mhash);
239                 kfree(mdb);
240                 return err;
241         }
242
243         call_rcu_bh(&mdb->rcu, br_mdb_free);
244
245 out:
246         rcu_assign_pointer(*mdbp, mdb);
247
248         return 0;
249 }
250
251 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
252                                                 __be32 group)
253 {
254         struct sk_buff *skb;
255         struct igmphdr *ih;
256         struct ethhdr *eth;
257         struct iphdr *iph;
258
259         skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
260                                                  sizeof(*ih) + 4);
261         if (!skb)
262                 goto out;
263
264         skb->protocol = htons(ETH_P_IP);
265
266         skb_reset_mac_header(skb);
267         eth = eth_hdr(skb);
268
269         memcpy(eth->h_source, br->dev->dev_addr, 6);
270         eth->h_dest[0] = 1;
271         eth->h_dest[1] = 0;
272         eth->h_dest[2] = 0x5e;
273         eth->h_dest[3] = 0;
274         eth->h_dest[4] = 0;
275         eth->h_dest[5] = 1;
276         eth->h_proto = htons(ETH_P_IP);
277         skb_put(skb, sizeof(*eth));
278
279         skb_set_network_header(skb, skb->len);
280         iph = ip_hdr(skb);
281
282         iph->version = 4;
283         iph->ihl = 6;
284         iph->tos = 0xc0;
285         iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4);
286         iph->id = 0;
287         iph->frag_off = htons(IP_DF);
288         iph->ttl = 1;
289         iph->protocol = IPPROTO_IGMP;
290         iph->saddr = 0;
291         iph->daddr = htonl(INADDR_ALLHOSTS_GROUP);
292         ((u8 *)&iph[1])[0] = IPOPT_RA;
293         ((u8 *)&iph[1])[1] = 4;
294         ((u8 *)&iph[1])[2] = 0;
295         ((u8 *)&iph[1])[3] = 0;
296         ip_send_check(iph);
297         skb_put(skb, 24);
298
299         skb_set_transport_header(skb, skb->len);
300         ih = igmp_hdr(skb);
301         ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
302         ih->code = (group ? br->multicast_last_member_interval :
303                             br->multicast_query_response_interval) /
304                    (HZ / IGMP_TIMER_SCALE);
305         ih->group = group;
306         ih->csum = 0;
307         ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
308         skb_put(skb, sizeof(*ih));
309
310         __skb_pull(skb, sizeof(*eth));
311
312 out:
313         return skb;
314 }
315
316 static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
317 {
318         struct net_bridge *br = mp->br;
319         struct sk_buff *skb;
320
321         skb = br_multicast_alloc_query(br, mp->addr);
322         if (!skb)
323                 goto timer;
324
325         netif_rx(skb);
326
327 timer:
328         if (++mp->queries_sent < br->multicast_last_member_count)
329                 mod_timer(&mp->query_timer,
330                           jiffies + br->multicast_last_member_interval);
331 }
332
333 static void br_multicast_group_query_expired(unsigned long data)
334 {
335         struct net_bridge_mdb_entry *mp = (void *)data;
336         struct net_bridge *br = mp->br;
337
338         spin_lock(&br->multicast_lock);
339         if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) ||
340             mp->queries_sent >= br->multicast_last_member_count)
341                 goto out;
342
343         br_multicast_send_group_query(mp);
344
345 out:
346         spin_unlock(&br->multicast_lock);
347 }
348
349 static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
350 {
351         struct net_bridge_port *port = pg->port;
352         struct net_bridge *br = port->br;
353         struct sk_buff *skb;
354
355         skb = br_multicast_alloc_query(br, pg->addr);
356         if (!skb)
357                 goto timer;
358
359         br_deliver(port, skb);
360
361 timer:
362         if (++pg->queries_sent < br->multicast_last_member_count)
363                 mod_timer(&pg->query_timer,
364                           jiffies + br->multicast_last_member_interval);
365 }
366
367 static void br_multicast_port_group_query_expired(unsigned long data)
368 {
369         struct net_bridge_port_group *pg = (void *)data;
370         struct net_bridge_port *port = pg->port;
371         struct net_bridge *br = port->br;
372
373         spin_lock(&br->multicast_lock);
374         if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
375             pg->queries_sent >= br->multicast_last_member_count)
376                 goto out;
377
378         br_multicast_send_port_group_query(pg);
379
380 out:
381         spin_unlock(&br->multicast_lock);
382 }
383
384 static struct net_bridge_mdb_entry *br_multicast_get_group(
385         struct net_bridge *br, struct net_bridge_port *port, __be32 group,
386         int hash)
387 {
388         struct net_bridge_mdb_htable *mdb = br->mdb;
389         struct net_bridge_mdb_entry *mp;
390         struct hlist_node *p;
391         unsigned count = 0;
392         unsigned max;
393         int elasticity;
394         int err;
395
396         hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
397                 count++;
398                 if (unlikely(group == mp->addr)) {
399                         return mp;
400                 }
401         }
402
403         elasticity = 0;
404         max = mdb->max;
405
406         if (unlikely(count > br->hash_elasticity && count)) {
407                 if (net_ratelimit())
408                         printk(KERN_INFO "%s: Multicast hash table "
409                                "chain limit reached: %s\n",
410                                br->dev->name, port ? port->dev->name :
411                                                      br->dev->name);
412
413                 elasticity = br->hash_elasticity;
414         }
415
416         if (mdb->size >= max) {
417                 max *= 2;
418                 if (unlikely(max >= br->hash_max)) {
419                         printk(KERN_WARNING "%s: Multicast hash table maximum "
420                                "reached, disabling snooping: %s, %d\n",
421                                br->dev->name, port ? port->dev->name :
422                                                      br->dev->name,
423                                max);
424                         err = -E2BIG;
425 disable:
426                         br->multicast_disabled = 1;
427                         goto err;
428                 }
429         }
430
431         if (max > mdb->max || elasticity) {
432                 if (mdb->old) {
433                         if (net_ratelimit())
434                                 printk(KERN_INFO "%s: Multicast hash table "
435                                        "on fire: %s\n",
436                                        br->dev->name, port ? port->dev->name :
437                                                              br->dev->name);
438                         err = -EEXIST;
439                         goto err;
440                 }
441
442                 err = br_mdb_rehash(&br->mdb, max, elasticity);
443                 if (err) {
444                         printk(KERN_WARNING "%s: Cannot rehash multicast "
445                                "hash table, disabling snooping: "
446                                "%s, %d, %d\n",
447                                br->dev->name, port ? port->dev->name :
448                                                      br->dev->name,
449                                mdb->size, err);
450                         goto disable;
451                 }
452
453                 err = -EAGAIN;
454                 goto err;
455         }
456
457         return NULL;
458
459 err:
460         mp = ERR_PTR(err);
461         return mp;
462 }
463
464 static struct net_bridge_mdb_entry *br_multicast_new_group(
465         struct net_bridge *br, struct net_bridge_port *port, __be32 group)
466 {
467         struct net_bridge_mdb_htable *mdb = br->mdb;
468         struct net_bridge_mdb_entry *mp;
469         int hash;
470
471         if (!mdb) {
472                 if (br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0))
473                         return NULL;
474                 goto rehash;
475         }
476
477         hash = br_ip_hash(mdb, group);
478         mp = br_multicast_get_group(br, port, group, hash);
479         switch (PTR_ERR(mp)) {
480         case 0:
481                 break;
482
483         case -EAGAIN:
484 rehash:
485                 mdb = br->mdb;
486                 hash = br_ip_hash(mdb, group);
487                 break;
488
489         default:
490                 goto out;
491         }
492
493         mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
494         if (unlikely(!mp))
495                 goto out;
496
497         mp->br = br;
498         mp->addr = group;
499         setup_timer(&mp->timer, br_multicast_group_expired,
500                     (unsigned long)mp);
501         setup_timer(&mp->query_timer, br_multicast_group_query_expired,
502                     (unsigned long)mp);
503
504         hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
505         mdb->size++;
506
507 out:
508         return mp;
509 }
510
511 static int br_multicast_add_group(struct net_bridge *br,
512                                   struct net_bridge_port *port, __be32 group)
513 {
514         struct net_bridge_mdb_entry *mp;
515         struct net_bridge_port_group *p;
516         struct net_bridge_port_group **pp;
517         unsigned long now = jiffies;
518         int err;
519
520         if (ipv4_is_local_multicast(group))
521                 return 0;
522
523         spin_lock(&br->multicast_lock);
524         if (!netif_running(br->dev) ||
525             (port && port->state == BR_STATE_DISABLED))
526                 goto out;
527
528         mp = br_multicast_new_group(br, port, group);
529         err = PTR_ERR(mp);
530         if (unlikely(IS_ERR(mp) || !mp))
531                 goto err;
532
533         if (!port) {
534                 hlist_add_head(&mp->mglist, &br->mglist);
535                 mod_timer(&mp->timer, now + br->multicast_membership_interval);
536                 goto out;
537         }
538
539         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
540                 if (p->port == port)
541                         goto found;
542                 if ((unsigned long)p->port < (unsigned long)port)
543                         break;
544         }
545
546         p = kzalloc(sizeof(*p), GFP_ATOMIC);
547         err = -ENOMEM;
548         if (unlikely(!p))
549                 goto err;
550
551         p->addr = group;
552         p->port = port;
553         p->next = *pp;
554         hlist_add_head(&p->mglist, &port->mglist);
555         setup_timer(&p->timer, br_multicast_port_group_expired,
556                     (unsigned long)p);
557         setup_timer(&p->query_timer, br_multicast_port_group_query_expired,
558                     (unsigned long)p);
559
560         rcu_assign_pointer(*pp, p);
561
562 found:
563         mod_timer(&p->timer, now + br->multicast_membership_interval);
564 out:
565         err = 0;
566
567 err:
568         spin_unlock(&br->multicast_lock);
569         return err;
570 }
571
572 static void br_multicast_router_expired(unsigned long data)
573 {
574         struct net_bridge_port *port = (void *)data;
575         struct net_bridge *br = port->br;
576
577         spin_lock(&br->multicast_lock);
578         if (port->multicast_router != 1 ||
579             timer_pending(&port->multicast_router_timer) ||
580             hlist_unhashed(&port->rlist))
581                 goto out;
582
583         hlist_del_init_rcu(&port->rlist);
584
585 out:
586         spin_unlock(&br->multicast_lock);
587 }
588
589 static void br_multicast_local_router_expired(unsigned long data)
590 {
591 }
592
593 static void br_multicast_send_query(struct net_bridge *br,
594                                     struct net_bridge_port *port, u32 sent)
595 {
596         unsigned long time;
597         struct sk_buff *skb;
598
599         if (!netif_running(br->dev) || br->multicast_disabled ||
600             timer_pending(&br->multicast_querier_timer))
601                 return;
602
603         skb = br_multicast_alloc_query(br, 0);
604         if (!skb)
605                 goto timer;
606
607         if (port) {
608                 __skb_push(skb, sizeof(struct ethhdr));
609                 skb->dev = port->dev;
610                 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
611                         dev_queue_xmit);
612         } else
613                 netif_rx(skb);
614
615 timer:
616         time = jiffies;
617         time += sent < br->multicast_startup_query_count ?
618                 br->multicast_startup_query_interval :
619                 br->multicast_query_interval;
620         mod_timer(port ? &port->multicast_query_timer :
621                          &br->multicast_query_timer, time);
622 }
623
624 static void br_multicast_port_query_expired(unsigned long data)
625 {
626         struct net_bridge_port *port = (void *)data;
627         struct net_bridge *br = port->br;
628
629         spin_lock(&br->multicast_lock);
630         if (port && (port->state == BR_STATE_DISABLED ||
631                      port->state == BR_STATE_BLOCKING))
632                 goto out;
633
634         if (port->multicast_startup_queries_sent <
635             br->multicast_startup_query_count)
636                 port->multicast_startup_queries_sent++;
637
638         br_multicast_send_query(port->br, port,
639                                 port->multicast_startup_queries_sent);
640
641 out:
642         spin_unlock(&br->multicast_lock);
643 }
644
645 void br_multicast_add_port(struct net_bridge_port *port)
646 {
647         port->multicast_router = 1;
648
649         setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
650                     (unsigned long)port);
651         setup_timer(&port->multicast_query_timer,
652                     br_multicast_port_query_expired, (unsigned long)port);
653 }
654
655 void br_multicast_del_port(struct net_bridge_port *port)
656 {
657         del_timer_sync(&port->multicast_router_timer);
658 }
659
660 static void __br_multicast_enable_port(struct net_bridge_port *port)
661 {
662         port->multicast_startup_queries_sent = 0;
663
664         if (try_to_del_timer_sync(&port->multicast_query_timer) >= 0 ||
665             del_timer(&port->multicast_query_timer))
666                 mod_timer(&port->multicast_query_timer, jiffies);
667 }
668
669 void br_multicast_enable_port(struct net_bridge_port *port)
670 {
671         struct net_bridge *br = port->br;
672
673         spin_lock(&br->multicast_lock);
674         if (br->multicast_disabled || !netif_running(br->dev))
675                 goto out;
676
677         __br_multicast_enable_port(port);
678
679 out:
680         spin_unlock(&br->multicast_lock);
681 }
682
683 void br_multicast_disable_port(struct net_bridge_port *port)
684 {
685         struct net_bridge *br = port->br;
686         struct net_bridge_port_group *pg;
687         struct hlist_node *p, *n;
688
689         spin_lock(&br->multicast_lock);
690         hlist_for_each_entry_safe(pg, p, n, &port->mglist, mglist)
691                 br_multicast_del_pg(br, pg);
692
693         if (!hlist_unhashed(&port->rlist))
694                 hlist_del_init_rcu(&port->rlist);
695         del_timer(&port->multicast_router_timer);
696         del_timer(&port->multicast_query_timer);
697         spin_unlock(&br->multicast_lock);
698 }
699
700 static int br_multicast_igmp3_report(struct net_bridge *br,
701                                      struct net_bridge_port *port,
702                                      struct sk_buff *skb)
703 {
704         struct igmpv3_report *ih;
705         struct igmpv3_grec *grec;
706         int i;
707         int len;
708         int num;
709         int type;
710         int err = 0;
711         __be32 group;
712
713         if (!pskb_may_pull(skb, sizeof(*ih)))
714                 return -EINVAL;
715
716         ih = igmpv3_report_hdr(skb);
717         num = ntohs(ih->ngrec);
718         len = sizeof(*ih);
719
720         for (i = 0; i < num; i++) {
721                 len += sizeof(*grec);
722                 if (!pskb_may_pull(skb, len))
723                         return -EINVAL;
724
725                 grec = (void *)(skb->data + len);
726                 group = grec->grec_mca;
727                 type = grec->grec_type;
728
729                 len += grec->grec_nsrcs * 4;
730                 if (!pskb_may_pull(skb, len))
731                         return -EINVAL;
732
733                 /* We treat this as an IGMPv2 report for now. */
734                 switch (type) {
735                 case IGMPV3_MODE_IS_INCLUDE:
736                 case IGMPV3_MODE_IS_EXCLUDE:
737                 case IGMPV3_CHANGE_TO_INCLUDE:
738                 case IGMPV3_CHANGE_TO_EXCLUDE:
739                 case IGMPV3_ALLOW_NEW_SOURCES:
740                 case IGMPV3_BLOCK_OLD_SOURCES:
741                         break;
742
743                 default:
744                         continue;
745                 }
746
747                 err = br_multicast_add_group(br, port, group);
748                 if (err)
749                         break;
750         }
751
752         return err;
753 }
754
755 static void br_multicast_add_router(struct net_bridge *br,
756                                     struct net_bridge_port *port)
757 {
758         struct hlist_node *p;
759         struct hlist_node **h;
760
761         for (h = &br->router_list.first;
762              (p = *h) &&
763              (unsigned long)container_of(p, struct net_bridge_port, rlist) >
764              (unsigned long)port;
765              h = &p->next)
766                 ;
767
768         port->rlist.pprev = h;
769         port->rlist.next = p;
770         rcu_assign_pointer(*h, &port->rlist);
771         if (p)
772                 p->pprev = &port->rlist.next;
773 }
774
775 static void br_multicast_mark_router(struct net_bridge *br,
776                                      struct net_bridge_port *port)
777 {
778         unsigned long now = jiffies;
779
780         if (!port) {
781                 if (br->multicast_router == 1)
782                         mod_timer(&br->multicast_router_timer,
783                                   now + br->multicast_querier_interval);
784                 return;
785         }
786
787         if (port->multicast_router != 1)
788                 return;
789
790         if (!hlist_unhashed(&port->rlist))
791                 goto timer;
792
793         br_multicast_add_router(br, port);
794
795 timer:
796         mod_timer(&port->multicast_router_timer,
797                   now + br->multicast_querier_interval);
798 }
799
800 static void br_multicast_query_received(struct net_bridge *br,
801                                         struct net_bridge_port *port,
802                                         __be32 saddr)
803 {
804         if (saddr)
805                 mod_timer(&br->multicast_querier_timer,
806                           jiffies + br->multicast_querier_interval);
807         else if (timer_pending(&br->multicast_querier_timer))
808                 return;
809
810         br_multicast_mark_router(br, port);
811 }
812
813 static int br_multicast_query(struct net_bridge *br,
814                               struct net_bridge_port *port,
815                               struct sk_buff *skb)
816 {
817         struct iphdr *iph = ip_hdr(skb);
818         struct igmphdr *ih = igmp_hdr(skb);
819         struct net_bridge_mdb_entry *mp;
820         struct igmpv3_query *ih3;
821         struct net_bridge_port_group *p;
822         struct net_bridge_port_group **pp;
823         unsigned long max_delay;
824         unsigned long now = jiffies;
825         __be32 group;
826
827         spin_lock(&br->multicast_lock);
828         if (!netif_running(br->dev) ||
829             (port && port->state == BR_STATE_DISABLED))
830                 goto out;
831
832         br_multicast_query_received(br, port, iph->saddr);
833
834         group = ih->group;
835
836         if (skb->len == sizeof(*ih)) {
837                 max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
838
839                 if (!max_delay) {
840                         max_delay = 10 * HZ;
841                         group = 0;
842                 }
843         } else {
844                 if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
845                         return -EINVAL;
846
847                 ih3 = igmpv3_query_hdr(skb);
848                 if (ih3->nsrcs)
849                         return 0;
850
851                 max_delay = ih3->code ? 1 :
852                             IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE);
853         }
854
855         if (!group)
856                 goto out;
857
858         mp = br_mdb_ip_get(br->mdb, group);
859         if (!mp)
860                 goto out;
861
862         max_delay *= br->multicast_last_member_count;
863
864         if (!hlist_unhashed(&mp->mglist) &&
865             (timer_pending(&mp->timer) ?
866              time_after(mp->timer.expires, now + max_delay) :
867              try_to_del_timer_sync(&mp->timer) >= 0))
868                 mod_timer(&mp->timer, now + max_delay);
869
870         for (pp = &mp->ports; (p = *pp); pp = &p->next) {
871                 if (timer_pending(&p->timer) ?
872                     time_after(p->timer.expires, now + max_delay) :
873                     try_to_del_timer_sync(&p->timer) >= 0)
874                         mod_timer(&mp->timer, now + max_delay);
875         }
876
877 out:
878         spin_unlock(&br->multicast_lock);
879         return 0;
880 }
881
882 static void br_multicast_leave_group(struct net_bridge *br,
883                                      struct net_bridge_port *port,
884                                      __be32 group)
885 {
886         struct net_bridge_mdb_htable *mdb;
887         struct net_bridge_mdb_entry *mp;
888         struct net_bridge_port_group *p;
889         unsigned long now;
890         unsigned long time;
891
892         if (ipv4_is_local_multicast(group))
893                 return;
894
895         spin_lock(&br->multicast_lock);
896         if (!netif_running(br->dev) ||
897             (port && port->state == BR_STATE_DISABLED) ||
898             timer_pending(&br->multicast_querier_timer))
899                 goto out;
900
901         mdb = br->mdb;
902         mp = br_mdb_ip_get(mdb, group);
903         if (!mp)
904                 goto out;
905
906         now = jiffies;
907         time = now + br->multicast_last_member_count *
908                      br->multicast_last_member_interval;
909
910         if (!port) {
911                 if (!hlist_unhashed(&mp->mglist) &&
912                     (timer_pending(&mp->timer) ?
913                      time_after(mp->timer.expires, time) :
914                      try_to_del_timer_sync(&mp->timer) >= 0)) {
915                         mod_timer(&mp->timer, time);
916
917                         mp->queries_sent = 0;
918                         mod_timer(&mp->query_timer, now);
919                 }
920
921                 goto out;
922         }
923
924         for (p = mp->ports; p; p = p->next) {
925                 if (p->port != port)
926                         continue;
927
928                 if (!hlist_unhashed(&p->mglist) &&
929                     (timer_pending(&p->timer) ?
930                      time_after(p->timer.expires, time) :
931                      try_to_del_timer_sync(&p->timer) >= 0)) {
932                         mod_timer(&p->timer, time);
933
934                         p->queries_sent = 0;
935                         mod_timer(&p->query_timer, now);
936                 }
937
938                 break;
939         }
940
941 out:
942         spin_unlock(&br->multicast_lock);
943 }
944
945 static int br_multicast_ipv4_rcv(struct net_bridge *br,
946                                  struct net_bridge_port *port,
947                                  struct sk_buff *skb)
948 {
949         struct sk_buff *skb2 = skb;
950         struct iphdr *iph;
951         struct igmphdr *ih;
952         unsigned len;
953         unsigned offset;
954         int err;
955
956         BR_INPUT_SKB_CB(skb)->igmp = 0;
957         BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
958
959         /* We treat OOM as packet loss for now. */
960         if (!pskb_may_pull(skb, sizeof(*iph)))
961                 return -EINVAL;
962
963         iph = ip_hdr(skb);
964
965         if (iph->ihl < 5 || iph->version != 4)
966                 return -EINVAL;
967
968         if (!pskb_may_pull(skb, ip_hdrlen(skb)))
969                 return -EINVAL;
970
971         iph = ip_hdr(skb);
972
973         if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
974                 return -EINVAL;
975
976         if (iph->protocol != IPPROTO_IGMP)
977                 return 0;
978
979         len = ntohs(iph->tot_len);
980         if (skb->len < len || len < ip_hdrlen(skb))
981                 return -EINVAL;
982
983         if (skb->len > len) {
984                 skb2 = skb_clone(skb, GFP_ATOMIC);
985                 if (!skb2)
986                         return -ENOMEM;
987
988                 err = pskb_trim_rcsum(skb2, len);
989                 if (err)
990                         return err;
991         }
992
993         len -= ip_hdrlen(skb2);
994         offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
995         __skb_pull(skb2, offset);
996         skb_reset_transport_header(skb2);
997
998         err = -EINVAL;
999         if (!pskb_may_pull(skb2, sizeof(*ih)))
1000                 goto out;
1001
1002         iph = ip_hdr(skb2);
1003
1004         switch (skb2->ip_summed) {
1005         case CHECKSUM_COMPLETE:
1006                 if (!csum_fold(skb2->csum))
1007                         break;
1008                 /* fall through */
1009         case CHECKSUM_NONE:
1010                 skb2->csum = 0;
1011                 if (skb_checksum_complete(skb2))
1012                         return -EINVAL;
1013         }
1014
1015         err = 0;
1016
1017         BR_INPUT_SKB_CB(skb)->igmp = 1;
1018         ih = igmp_hdr(skb2);
1019
1020         switch (ih->type) {
1021         case IGMP_HOST_MEMBERSHIP_REPORT:
1022         case IGMPV2_HOST_MEMBERSHIP_REPORT:
1023                 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
1024                 err = br_multicast_add_group(br, port, ih->group);
1025                 break;
1026         case IGMPV3_HOST_MEMBERSHIP_REPORT:
1027                 err = br_multicast_igmp3_report(br, port, skb2);
1028                 break;
1029         case IGMP_HOST_MEMBERSHIP_QUERY:
1030                 err = br_multicast_query(br, port, skb2);
1031                 break;
1032         case IGMP_HOST_LEAVE_MESSAGE:
1033                 br_multicast_leave_group(br, port, ih->group);
1034                 break;
1035         }
1036
1037 out:
1038         __skb_push(skb2, offset);
1039         if (skb2 != skb)
1040                 kfree_skb(skb2);
1041         return err;
1042 }
1043
1044 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
1045                      struct sk_buff *skb)
1046 {
1047         if (br->multicast_disabled)
1048                 return 0;
1049
1050         switch (skb->protocol) {
1051         case htons(ETH_P_IP):
1052                 return br_multicast_ipv4_rcv(br, port, skb);
1053         }
1054
1055         return 0;
1056 }
1057
1058 static void br_multicast_query_expired(unsigned long data)
1059 {
1060         struct net_bridge *br = (void *)data;
1061
1062         spin_lock(&br->multicast_lock);
1063         if (br->multicast_startup_queries_sent <
1064             br->multicast_startup_query_count)
1065                 br->multicast_startup_queries_sent++;
1066
1067         br_multicast_send_query(br, NULL, br->multicast_startup_queries_sent);
1068
1069         spin_unlock(&br->multicast_lock);
1070 }
1071
1072 void br_multicast_init(struct net_bridge *br)
1073 {
1074         br->hash_elasticity = 4;
1075         br->hash_max = 512;
1076
1077         br->multicast_router = 1;
1078         br->multicast_last_member_count = 2;
1079         br->multicast_startup_query_count = 2;
1080
1081         br->multicast_last_member_interval = HZ;
1082         br->multicast_query_response_interval = 10 * HZ;
1083         br->multicast_startup_query_interval = 125 * HZ / 4;
1084         br->multicast_query_interval = 125 * HZ;
1085         br->multicast_querier_interval = 255 * HZ;
1086         br->multicast_membership_interval = 260 * HZ;
1087
1088         spin_lock_init(&br->multicast_lock);
1089         setup_timer(&br->multicast_router_timer,
1090                     br_multicast_local_router_expired, 0);
1091         setup_timer(&br->multicast_querier_timer,
1092                     br_multicast_local_router_expired, 0);
1093         setup_timer(&br->multicast_query_timer, br_multicast_query_expired,
1094                     (unsigned long)br);
1095 }
1096
1097 void br_multicast_open(struct net_bridge *br)
1098 {
1099         br->multicast_startup_queries_sent = 0;
1100
1101         if (br->multicast_disabled)
1102                 return;
1103
1104         mod_timer(&br->multicast_query_timer, jiffies);
1105 }
1106
1107 void br_multicast_stop(struct net_bridge *br)
1108 {
1109         struct net_bridge_mdb_htable *mdb;
1110         struct net_bridge_mdb_entry *mp;
1111         struct hlist_node *p, *n;
1112         u32 ver;
1113         int i;
1114
1115         del_timer_sync(&br->multicast_router_timer);
1116         del_timer_sync(&br->multicast_querier_timer);
1117         del_timer_sync(&br->multicast_query_timer);
1118
1119         spin_lock_bh(&br->multicast_lock);
1120         mdb = br->mdb;
1121         if (!mdb)
1122                 goto out;
1123
1124         br->mdb = NULL;
1125
1126         ver = mdb->ver;
1127         for (i = 0; i < mdb->max; i++) {
1128                 hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i],
1129                                           hlist[ver]) {
1130                         del_timer(&mp->timer);
1131                         del_timer(&mp->query_timer);
1132                         call_rcu_bh(&mp->rcu, br_multicast_free_group);
1133                 }
1134         }
1135
1136         if (mdb->old) {
1137                 spin_unlock_bh(&br->multicast_lock);
1138                 synchronize_rcu_bh();
1139                 spin_lock_bh(&br->multicast_lock);
1140                 WARN_ON(mdb->old);
1141         }
1142
1143         mdb->old = mdb;
1144         call_rcu_bh(&mdb->rcu, br_mdb_free);
1145
1146 out:
1147         spin_unlock_bh(&br->multicast_lock);
1148 }
1149
1150 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
1151 {
1152         int err = -ENOENT;
1153
1154         spin_lock_bh(&br->multicast_lock);
1155         if (!netif_running(br->dev))
1156                 goto unlock;
1157
1158         switch (val) {
1159         case 0:
1160         case 2:
1161                 del_timer(&br->multicast_router_timer);
1162                 /* fall through */
1163         case 1:
1164                 br->multicast_router = val;
1165                 err = 0;
1166                 break;
1167
1168         default:
1169                 err = -EINVAL;
1170                 break;
1171         }
1172
1173 unlock:
1174         spin_unlock_bh(&br->multicast_lock);
1175
1176         return err;
1177 }
1178
1179 int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
1180 {
1181         struct net_bridge *br = p->br;
1182         int err = -ENOENT;
1183
1184         spin_lock(&br->multicast_lock);
1185         if (!netif_running(br->dev) || p->state == BR_STATE_DISABLED)
1186                 goto unlock;
1187
1188         switch (val) {
1189         case 0:
1190         case 1:
1191         case 2:
1192                 p->multicast_router = val;
1193                 err = 0;
1194
1195                 if (val < 2 && !hlist_unhashed(&p->rlist))
1196                         hlist_del_init_rcu(&p->rlist);
1197
1198                 if (val == 1)
1199                         break;
1200
1201                 del_timer(&p->multicast_router_timer);
1202
1203                 if (val == 0)
1204                         break;
1205
1206                 br_multicast_add_router(br, p);
1207                 break;
1208
1209         default:
1210                 err = -EINVAL;
1211                 break;
1212         }
1213
1214 unlock:
1215         spin_unlock(&br->multicast_lock);
1216
1217         return err;
1218 }
1219
1220 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1221 {
1222         struct net_bridge_port *port;
1223         int err = -ENOENT;
1224
1225         spin_lock(&br->multicast_lock);
1226         if (!netif_running(br->dev))
1227                 goto unlock;
1228
1229         err = 0;
1230         if (br->multicast_disabled == !val)
1231                 goto unlock;
1232
1233         br->multicast_disabled = !val;
1234         if (br->multicast_disabled)
1235                 goto unlock;
1236
1237         if (br->mdb) {
1238                 if (br->mdb->old) {
1239                         err = -EEXIST;
1240 rollback:
1241                         br->multicast_disabled = !!val;
1242                         goto unlock;
1243                 }
1244
1245                 err = br_mdb_rehash(&br->mdb, br->mdb->max,
1246                                     br->hash_elasticity);
1247                 if (err)
1248                         goto rollback;
1249         }
1250
1251         br_multicast_open(br);
1252         list_for_each_entry(port, &br->port_list, list) {
1253                 if (port->state == BR_STATE_DISABLED ||
1254                     port->state == BR_STATE_BLOCKING)
1255                         continue;
1256
1257                 __br_multicast_enable_port(port);
1258         }
1259
1260 unlock:
1261         spin_unlock(&br->multicast_lock);
1262
1263         return err;
1264 }
1265
1266 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
1267 {
1268         int err = -ENOENT;
1269         u32 old;
1270
1271         spin_lock(&br->multicast_lock);
1272         if (!netif_running(br->dev))
1273                 goto unlock;
1274
1275         err = -EINVAL;
1276         if (!is_power_of_2(val))
1277                 goto unlock;
1278         if (br->mdb && val < br->mdb->size)
1279                 goto unlock;
1280
1281         err = 0;
1282
1283         old = br->hash_max;
1284         br->hash_max = val;
1285
1286         if (br->mdb) {
1287                 if (br->mdb->old) {
1288                         err = -EEXIST;
1289 rollback:
1290                         br->hash_max = old;
1291                         goto unlock;
1292                 }
1293
1294                 err = br_mdb_rehash(&br->mdb, br->hash_max,
1295                                     br->hash_elasticity);
1296                 if (err)
1297                         goto rollback;
1298         }
1299
1300 unlock:
1301         spin_unlock(&br->multicast_lock);
1302
1303         return err;
1304 }