Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6.git] / net / packet / af_packet.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              PACKET - implements raw packet sockets.
7  *
8  * Authors:     Ross Biro
9  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
11  *
12  * Fixes:
13  *              Alan Cox        :       verify_area() now used correctly
14  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
15  *              Alan Cox        :       tidied skbuff lists.
16  *              Alan Cox        :       Now uses generic datagram routines I
17  *                                      added. Also fixed the peek/read crash
18  *                                      from all old Linux datagram code.
19  *              Alan Cox        :       Uses the improved datagram code.
20  *              Alan Cox        :       Added NULL's for socket options.
21  *              Alan Cox        :       Re-commented the code.
22  *              Alan Cox        :       Use new kernel side addressing
23  *              Rob Janssen     :       Correct MTU usage.
24  *              Dave Platt      :       Counter leaks caused by incorrect
25  *                                      interrupt locking and some slightly
26  *                                      dubious gcc output. Can you read
27  *                                      compiler: it said _VOLATILE_
28  *      Richard Kooijman        :       Timestamp fixes.
29  *              Alan Cox        :       New buffers. Use sk->mac.raw.
30  *              Alan Cox        :       sendmsg/recvmsg support.
31  *              Alan Cox        :       Protocol setting support
32  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
33  *      Cyrus Durgin            :       Fixed kerneld for kmod.
34  *      Michal Ostrowski        :       Module initialization cleanup.
35  *         Ulises Alonso        :       Frame number limit removal and
36  *                                      packet_set_ring memory leak.
37  *              Eric Biederman  :       Allow for > 8 byte hardware addresses.
38  *                                      The convention is that longer addresses
39  *                                      will simply extend the hardware address
40  *                                      byte arrays at the end of sockaddr_ll
41  *                                      and packet_mreq.
42  *              Johann Baudy    :       Added TX RING.
43  *
44  *              This program is free software; you can redistribute it and/or
45  *              modify it under the terms of the GNU General Public License
46  *              as published by the Free Software Foundation; either version
47  *              2 of the License, or (at your option) any later version.
48  *
49  */
50
51 #include <linux/types.h>
52 #include <linux/mm.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
56 #include <linux/in.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <net/net_namespace.h>
64 #include <net/ip.h>
65 #include <net/protocol.h>
66 #include <linux/skbuff.h>
67 #include <net/sock.h>
68 #include <linux/errno.h>
69 #include <linux/timer.h>
70 #include <asm/system.h>
71 #include <asm/uaccess.h>
72 #include <asm/ioctls.h>
73 #include <asm/page.h>
74 #include <asm/cacheflush.h>
75 #include <asm/io.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
78 #include <linux/poll.h>
79 #include <linux/module.h>
80 #include <linux/init.h>
81 #include <linux/mutex.h>
82
83 #ifdef CONFIG_INET
84 #include <net/inet_common.h>
85 #endif
86
87 /*
88    Assumptions:
89    - if device has no dev->hard_header routine, it adds and removes ll header
90      inside itself. In this case ll header is invisible outside of device,
91      but higher levels still should reserve dev->hard_header_len.
92      Some devices are enough clever to reallocate skb, when header
93      will not fit to reserved space (tunnel), another ones are silly
94      (PPP).
95    - packet socket receives packets with pulled ll header,
96      so that SOCK_RAW should push it back.
97
98 On receive:
99 -----------
100
101 Incoming, dev->hard_header!=NULL
102    mac_header -> ll header
103    data       -> data
104
105 Outgoing, dev->hard_header!=NULL
106    mac_header -> ll header
107    data       -> ll header
108
109 Incoming, dev->hard_header==NULL
110    mac_header -> UNKNOWN position. It is very likely, that it points to ll
111                  header.  PPP makes it, that is wrong, because introduce
112                  assymetry between rx and tx paths.
113    data       -> data
114
115 Outgoing, dev->hard_header==NULL
116    mac_header -> data. ll header is still not built!
117    data       -> data
118
119 Resume
120   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121
122
123 On transmit:
124 ------------
125
126 dev->hard_header != NULL
127    mac_header -> ll header
128    data       -> ll header
129
130 dev->hard_header == NULL (ll header is added by device, we cannot control it)
131    mac_header -> data
132    data       -> data
133
134    We should set nh.raw on output to correct posistion,
135    packet classifier depends on it.
136  */
137
138 /* Private packet socket structures. */
139
140 struct packet_mclist {
141         struct packet_mclist    *next;
142         int                     ifindex;
143         int                     count;
144         unsigned short          type;
145         unsigned short          alen;
146         unsigned char           addr[MAX_ADDR_LEN];
147 };
148 /* identical to struct packet_mreq except it has
149  * a longer address field.
150  */
151 struct packet_mreq_max {
152         int             mr_ifindex;
153         unsigned short  mr_type;
154         unsigned short  mr_alen;
155         unsigned char   mr_address[MAX_ADDR_LEN];
156 };
157
158 #ifdef CONFIG_PACKET_MMAP
159 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
160                 int closing, int tx_ring);
161
162 struct packet_ring_buffer {
163         char                    **pg_vec;
164         unsigned int            head;
165         unsigned int            frames_per_block;
166         unsigned int            frame_size;
167         unsigned int            frame_max;
168
169         unsigned int            pg_vec_order;
170         unsigned int            pg_vec_pages;
171         unsigned int            pg_vec_len;
172
173         atomic_t                pending;
174 };
175
176 struct packet_sock;
177 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
178 #endif
179
180 static void packet_flush_mclist(struct sock *sk);
181
182 struct packet_sock {
183         /* struct sock has to be the first member of packet_sock */
184         struct sock             sk;
185         struct tpacket_stats    stats;
186 #ifdef CONFIG_PACKET_MMAP
187         struct packet_ring_buffer       rx_ring;
188         struct packet_ring_buffer       tx_ring;
189         int                     copy_thresh;
190 #endif
191         struct packet_type      prot_hook;
192         spinlock_t              bind_lock;
193         struct mutex            pg_vec_lock;
194         unsigned int            running:1,      /* prot_hook is attached*/
195                                 auxdata:1,
196                                 origdev:1;
197         int                     ifindex;        /* bound device         */
198         __be16                  num;
199         struct packet_mclist    *mclist;
200 #ifdef CONFIG_PACKET_MMAP
201         atomic_t                mapped;
202         enum tpacket_versions   tp_version;
203         unsigned int            tp_hdrlen;
204         unsigned int            tp_reserve;
205         unsigned int            tp_loss:1;
206 #endif
207 };
208
209 struct packet_skb_cb {
210         unsigned int origlen;
211         union {
212                 struct sockaddr_pkt pkt;
213                 struct sockaddr_ll ll;
214         } sa;
215 };
216
217 #define PACKET_SKB_CB(__skb)    ((struct packet_skb_cb *)((__skb)->cb))
218
219 #ifdef CONFIG_PACKET_MMAP
220
221 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
222 {
223         union {
224                 struct tpacket_hdr *h1;
225                 struct tpacket2_hdr *h2;
226                 void *raw;
227         } h;
228
229         h.raw = frame;
230         switch (po->tp_version) {
231         case TPACKET_V1:
232                 h.h1->tp_status = status;
233                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
234                 break;
235         case TPACKET_V2:
236                 h.h2->tp_status = status;
237                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
238                 break;
239         default:
240                 pr_err("TPACKET version not supported\n");
241                 BUG();
242         }
243
244         smp_wmb();
245 }
246
247 static int __packet_get_status(struct packet_sock *po, void *frame)
248 {
249         union {
250                 struct tpacket_hdr *h1;
251                 struct tpacket2_hdr *h2;
252                 void *raw;
253         } h;
254
255         smp_rmb();
256
257         h.raw = frame;
258         switch (po->tp_version) {
259         case TPACKET_V1:
260                 flush_dcache_page(virt_to_page(&h.h1->tp_status));
261                 return h.h1->tp_status;
262         case TPACKET_V2:
263                 flush_dcache_page(virt_to_page(&h.h2->tp_status));
264                 return h.h2->tp_status;
265         default:
266                 pr_err("TPACKET version not supported\n");
267                 BUG();
268                 return 0;
269         }
270 }
271
272 static void *packet_lookup_frame(struct packet_sock *po,
273                 struct packet_ring_buffer *rb,
274                 unsigned int position,
275                 int status)
276 {
277         unsigned int pg_vec_pos, frame_offset;
278         union {
279                 struct tpacket_hdr *h1;
280                 struct tpacket2_hdr *h2;
281                 void *raw;
282         } h;
283
284         pg_vec_pos = position / rb->frames_per_block;
285         frame_offset = position % rb->frames_per_block;
286
287         h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
288
289         if (status != __packet_get_status(po, h.raw))
290                 return NULL;
291
292         return h.raw;
293 }
294
295 static inline void *packet_current_frame(struct packet_sock *po,
296                 struct packet_ring_buffer *rb,
297                 int status)
298 {
299         return packet_lookup_frame(po, rb, rb->head, status);
300 }
301
302 static inline void *packet_previous_frame(struct packet_sock *po,
303                 struct packet_ring_buffer *rb,
304                 int status)
305 {
306         unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
307         return packet_lookup_frame(po, rb, previous, status);
308 }
309
310 static inline void packet_increment_head(struct packet_ring_buffer *buff)
311 {
312         buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
313 }
314
315 #endif
316
317 static inline struct packet_sock *pkt_sk(struct sock *sk)
318 {
319         return (struct packet_sock *)sk;
320 }
321
322 static void packet_sock_destruct(struct sock *sk)
323 {
324         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
325         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
326
327         if (!sock_flag(sk, SOCK_DEAD)) {
328                 pr_err("Attempt to release alive packet socket: %p\n", sk);
329                 return;
330         }
331
332         sk_refcnt_debug_dec(sk);
333 }
334
335
336 static const struct proto_ops packet_ops;
337
338 static const struct proto_ops packet_ops_spkt;
339
340 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
341                            struct packet_type *pt, struct net_device *orig_dev)
342 {
343         struct sock *sk;
344         struct sockaddr_pkt *spkt;
345
346         /*
347          *      When we registered the protocol we saved the socket in the data
348          *      field for just this event.
349          */
350
351         sk = pt->af_packet_priv;
352
353         /*
354          *      Yank back the headers [hope the device set this
355          *      right or kerboom...]
356          *
357          *      Incoming packets have ll header pulled,
358          *      push it back.
359          *
360          *      For outgoing ones skb->data == skb_mac_header(skb)
361          *      so that this procedure is noop.
362          */
363
364         if (skb->pkt_type == PACKET_LOOPBACK)
365                 goto out;
366
367         if (dev_net(dev) != sock_net(sk))
368                 goto out;
369
370         skb = skb_share_check(skb, GFP_ATOMIC);
371         if (skb == NULL)
372                 goto oom;
373
374         /* drop any routing info */
375         skb_dst_drop(skb);
376
377         /* drop conntrack reference */
378         nf_reset(skb);
379
380         spkt = &PACKET_SKB_CB(skb)->sa.pkt;
381
382         skb_push(skb, skb->data - skb_mac_header(skb));
383
384         /*
385          *      The SOCK_PACKET socket receives _all_ frames.
386          */
387
388         spkt->spkt_family = dev->type;
389         strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
390         spkt->spkt_protocol = skb->protocol;
391
392         /*
393          *      Charge the memory to the socket. This is done specifically
394          *      to prevent sockets using all the memory up.
395          */
396
397         if (sock_queue_rcv_skb(sk, skb) == 0)
398                 return 0;
399
400 out:
401         kfree_skb(skb);
402 oom:
403         return 0;
404 }
405
406
407 /*
408  *      Output a raw packet to a device layer. This bypasses all the other
409  *      protocol layers and you must therefore supply it with a complete frame
410  */
411
412 static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
413                                struct msghdr *msg, size_t len)
414 {
415         struct sock *sk = sock->sk;
416         struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
417         struct sk_buff *skb;
418         struct net_device *dev;
419         __be16 proto = 0;
420         int err;
421
422         /*
423          *      Get and verify the address.
424          */
425
426         if (saddr) {
427                 if (msg->msg_namelen < sizeof(struct sockaddr))
428                         return -EINVAL;
429                 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
430                         proto = saddr->spkt_protocol;
431         } else
432                 return -ENOTCONN;       /* SOCK_PACKET must be sent giving an address */
433
434         /*
435          *      Find the device first to size check it
436          */
437
438         saddr->spkt_device[13] = 0;
439         dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
440         err = -ENODEV;
441         if (dev == NULL)
442                 goto out_unlock;
443
444         err = -ENETDOWN;
445         if (!(dev->flags & IFF_UP))
446                 goto out_unlock;
447
448         /*
449          * You may not queue a frame bigger than the mtu. This is the lowest level
450          * raw protocol and you must do your own fragmentation at this level.
451          */
452
453         err = -EMSGSIZE;
454         if (len > dev->mtu + dev->hard_header_len)
455                 goto out_unlock;
456
457         err = -ENOBUFS;
458         skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
459
460         /*
461          * If the write buffer is full, then tough. At this level the user
462          * gets to deal with the problem - do your own algorithmic backoffs.
463          * That's far more flexible.
464          */
465
466         if (skb == NULL)
467                 goto out_unlock;
468
469         /*
470          *      Fill it in
471          */
472
473         /* FIXME: Save some space for broken drivers that write a
474          * hard header at transmission time by themselves. PPP is the
475          * notable one here. This should really be fixed at the driver level.
476          */
477         skb_reserve(skb, LL_RESERVED_SPACE(dev));
478         skb_reset_network_header(skb);
479
480         /* Try to align data part correctly */
481         if (dev->header_ops) {
482                 skb->data -= dev->hard_header_len;
483                 skb->tail -= dev->hard_header_len;
484                 if (len < dev->hard_header_len)
485                         skb_reset_network_header(skb);
486         }
487
488         /* Returns -EFAULT on error */
489         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
490         skb->protocol = proto;
491         skb->dev = dev;
492         skb->priority = sk->sk_priority;
493         if (err)
494                 goto out_free;
495
496         /*
497          *      Now send it
498          */
499
500         dev_queue_xmit(skb);
501         dev_put(dev);
502         return len;
503
504 out_free:
505         kfree_skb(skb);
506 out_unlock:
507         if (dev)
508                 dev_put(dev);
509         return err;
510 }
511
512 static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
513                                       unsigned int res)
514 {
515         struct sk_filter *filter;
516
517         rcu_read_lock_bh();
518         filter = rcu_dereference(sk->sk_filter);
519         if (filter != NULL)
520                 res = sk_run_filter(skb, filter->insns, filter->len);
521         rcu_read_unlock_bh();
522
523         return res;
524 }
525
526 /*
527    This function makes lazy skb cloning in hope that most of packets
528    are discarded by BPF.
529
530    Note tricky part: we DO mangle shared skb! skb->data, skb->len
531    and skb->cb are mangled. It works because (and until) packets
532    falling here are owned by current CPU. Output packets are cloned
533    by dev_queue_xmit_nit(), input packets are processed by net_bh
534    sequencially, so that if we return skb to original state on exit,
535    we will not harm anyone.
536  */
537
538 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
539                       struct packet_type *pt, struct net_device *orig_dev)
540 {
541         struct sock *sk;
542         struct sockaddr_ll *sll;
543         struct packet_sock *po;
544         u8 *skb_head = skb->data;
545         int skb_len = skb->len;
546         unsigned int snaplen, res;
547
548         if (skb->pkt_type == PACKET_LOOPBACK)
549                 goto drop;
550
551         sk = pt->af_packet_priv;
552         po = pkt_sk(sk);
553
554         if (dev_net(dev) != sock_net(sk))
555                 goto drop;
556
557         skb->dev = dev;
558
559         if (dev->header_ops) {
560                 /* The device has an explicit notion of ll header,
561                    exported to higher levels.
562
563                    Otherwise, the device hides datails of it frame
564                    structure, so that corresponding packet head
565                    never delivered to user.
566                  */
567                 if (sk->sk_type != SOCK_DGRAM)
568                         skb_push(skb, skb->data - skb_mac_header(skb));
569                 else if (skb->pkt_type == PACKET_OUTGOING) {
570                         /* Special case: outgoing packets have ll header at head */
571                         skb_pull(skb, skb_network_offset(skb));
572                 }
573         }
574
575         snaplen = skb->len;
576
577         res = run_filter(skb, sk, snaplen);
578         if (!res)
579                 goto drop_n_restore;
580         if (snaplen > res)
581                 snaplen = res;
582
583         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
584             (unsigned)sk->sk_rcvbuf)
585                 goto drop_n_acct;
586
587         if (skb_shared(skb)) {
588                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
589                 if (nskb == NULL)
590                         goto drop_n_acct;
591
592                 if (skb_head != skb->data) {
593                         skb->data = skb_head;
594                         skb->len = skb_len;
595                 }
596                 kfree_skb(skb);
597                 skb = nskb;
598         }
599
600         BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
601                      sizeof(skb->cb));
602
603         sll = &PACKET_SKB_CB(skb)->sa.ll;
604         sll->sll_family = AF_PACKET;
605         sll->sll_hatype = dev->type;
606         sll->sll_protocol = skb->protocol;
607         sll->sll_pkttype = skb->pkt_type;
608         if (unlikely(po->origdev))
609                 sll->sll_ifindex = orig_dev->ifindex;
610         else
611                 sll->sll_ifindex = dev->ifindex;
612
613         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
614
615         PACKET_SKB_CB(skb)->origlen = skb->len;
616
617         if (pskb_trim(skb, snaplen))
618                 goto drop_n_acct;
619
620         skb_set_owner_r(skb, sk);
621         skb->dev = NULL;
622         skb_dst_drop(skb);
623
624         /* drop conntrack reference */
625         nf_reset(skb);
626
627         spin_lock(&sk->sk_receive_queue.lock);
628         po->stats.tp_packets++;
629         __skb_queue_tail(&sk->sk_receive_queue, skb);
630         spin_unlock(&sk->sk_receive_queue.lock);
631         sk->sk_data_ready(sk, skb->len);
632         return 0;
633
634 drop_n_acct:
635         spin_lock(&sk->sk_receive_queue.lock);
636         po->stats.tp_drops++;
637         spin_unlock(&sk->sk_receive_queue.lock);
638
639 drop_n_restore:
640         if (skb_head != skb->data && skb_shared(skb)) {
641                 skb->data = skb_head;
642                 skb->len = skb_len;
643         }
644 drop:
645         consume_skb(skb);
646         return 0;
647 }
648
649 #ifdef CONFIG_PACKET_MMAP
650 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
651                        struct packet_type *pt, struct net_device *orig_dev)
652 {
653         struct sock *sk;
654         struct packet_sock *po;
655         struct sockaddr_ll *sll;
656         union {
657                 struct tpacket_hdr *h1;
658                 struct tpacket2_hdr *h2;
659                 void *raw;
660         } h;
661         u8 *skb_head = skb->data;
662         int skb_len = skb->len;
663         unsigned int snaplen, res;
664         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
665         unsigned short macoff, netoff, hdrlen;
666         struct sk_buff *copy_skb = NULL;
667         struct timeval tv;
668         struct timespec ts;
669
670         if (skb->pkt_type == PACKET_LOOPBACK)
671                 goto drop;
672
673         sk = pt->af_packet_priv;
674         po = pkt_sk(sk);
675
676         if (dev_net(dev) != sock_net(sk))
677                 goto drop;
678
679         if (dev->header_ops) {
680                 if (sk->sk_type != SOCK_DGRAM)
681                         skb_push(skb, skb->data - skb_mac_header(skb));
682                 else if (skb->pkt_type == PACKET_OUTGOING) {
683                         /* Special case: outgoing packets have ll header at head */
684                         skb_pull(skb, skb_network_offset(skb));
685                 }
686         }
687
688         if (skb->ip_summed == CHECKSUM_PARTIAL)
689                 status |= TP_STATUS_CSUMNOTREADY;
690
691         snaplen = skb->len;
692
693         res = run_filter(skb, sk, snaplen);
694         if (!res)
695                 goto drop_n_restore;
696         if (snaplen > res)
697                 snaplen = res;
698
699         if (sk->sk_type == SOCK_DGRAM) {
700                 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
701                                   po->tp_reserve;
702         } else {
703                 unsigned maclen = skb_network_offset(skb);
704                 netoff = TPACKET_ALIGN(po->tp_hdrlen +
705                                        (maclen < 16 ? 16 : maclen)) +
706                         po->tp_reserve;
707                 macoff = netoff - maclen;
708         }
709
710         if (macoff + snaplen > po->rx_ring.frame_size) {
711                 if (po->copy_thresh &&
712                     atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
713                     (unsigned)sk->sk_rcvbuf) {
714                         if (skb_shared(skb)) {
715                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
716                         } else {
717                                 copy_skb = skb_get(skb);
718                                 skb_head = skb->data;
719                         }
720                         if (copy_skb)
721                                 skb_set_owner_r(copy_skb, sk);
722                 }
723                 snaplen = po->rx_ring.frame_size - macoff;
724                 if ((int)snaplen < 0)
725                         snaplen = 0;
726         }
727
728         spin_lock(&sk->sk_receive_queue.lock);
729         h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
730         if (!h.raw)
731                 goto ring_is_full;
732         packet_increment_head(&po->rx_ring);
733         po->stats.tp_packets++;
734         if (copy_skb) {
735                 status |= TP_STATUS_COPY;
736                 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
737         }
738         if (!po->stats.tp_drops)
739                 status &= ~TP_STATUS_LOSING;
740         spin_unlock(&sk->sk_receive_queue.lock);
741
742         skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
743
744         switch (po->tp_version) {
745         case TPACKET_V1:
746                 h.h1->tp_len = skb->len;
747                 h.h1->tp_snaplen = snaplen;
748                 h.h1->tp_mac = macoff;
749                 h.h1->tp_net = netoff;
750                 if (skb->tstamp.tv64)
751                         tv = ktime_to_timeval(skb->tstamp);
752                 else
753                         do_gettimeofday(&tv);
754                 h.h1->tp_sec = tv.tv_sec;
755                 h.h1->tp_usec = tv.tv_usec;
756                 hdrlen = sizeof(*h.h1);
757                 break;
758         case TPACKET_V2:
759                 h.h2->tp_len = skb->len;
760                 h.h2->tp_snaplen = snaplen;
761                 h.h2->tp_mac = macoff;
762                 h.h2->tp_net = netoff;
763                 if (skb->tstamp.tv64)
764                         ts = ktime_to_timespec(skb->tstamp);
765                 else
766                         getnstimeofday(&ts);
767                 h.h2->tp_sec = ts.tv_sec;
768                 h.h2->tp_nsec = ts.tv_nsec;
769                 h.h2->tp_vlan_tci = skb->vlan_tci;
770                 hdrlen = sizeof(*h.h2);
771                 break;
772         default:
773                 BUG();
774         }
775
776         sll = h.raw + TPACKET_ALIGN(hdrlen);
777         sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
778         sll->sll_family = AF_PACKET;
779         sll->sll_hatype = dev->type;
780         sll->sll_protocol = skb->protocol;
781         sll->sll_pkttype = skb->pkt_type;
782         if (unlikely(po->origdev))
783                 sll->sll_ifindex = orig_dev->ifindex;
784         else
785                 sll->sll_ifindex = dev->ifindex;
786
787         __packet_set_status(po, h.raw, status);
788         smp_mb();
789         {
790                 struct page *p_start, *p_end;
791                 u8 *h_end = h.raw + macoff + snaplen - 1;
792
793                 p_start = virt_to_page(h.raw);
794                 p_end = virt_to_page(h_end);
795                 while (p_start <= p_end) {
796                         flush_dcache_page(p_start);
797                         p_start++;
798                 }
799         }
800
801         sk->sk_data_ready(sk, 0);
802
803 drop_n_restore:
804         if (skb_head != skb->data && skb_shared(skb)) {
805                 skb->data = skb_head;
806                 skb->len = skb_len;
807         }
808 drop:
809         kfree_skb(skb);
810         return 0;
811
812 ring_is_full:
813         po->stats.tp_drops++;
814         spin_unlock(&sk->sk_receive_queue.lock);
815
816         sk->sk_data_ready(sk, 0);
817         kfree_skb(copy_skb);
818         goto drop_n_restore;
819 }
820
821 static void tpacket_destruct_skb(struct sk_buff *skb)
822 {
823         struct packet_sock *po = pkt_sk(skb->sk);
824         void *ph;
825
826         BUG_ON(skb == NULL);
827
828         if (likely(po->tx_ring.pg_vec)) {
829                 ph = skb_shinfo(skb)->destructor_arg;
830                 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
831                 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
832                 atomic_dec(&po->tx_ring.pending);
833                 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
834         }
835
836         sock_wfree(skb);
837 }
838
839 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
840                 void *frame, struct net_device *dev, int size_max,
841                 __be16 proto, unsigned char *addr)
842 {
843         union {
844                 struct tpacket_hdr *h1;
845                 struct tpacket2_hdr *h2;
846                 void *raw;
847         } ph;
848         int to_write, offset, len, tp_len, nr_frags, len_max;
849         struct socket *sock = po->sk.sk_socket;
850         struct page *page;
851         void *data;
852         int err;
853
854         ph.raw = frame;
855
856         skb->protocol = proto;
857         skb->dev = dev;
858         skb->priority = po->sk.sk_priority;
859         skb_shinfo(skb)->destructor_arg = ph.raw;
860
861         switch (po->tp_version) {
862         case TPACKET_V2:
863                 tp_len = ph.h2->tp_len;
864                 break;
865         default:
866                 tp_len = ph.h1->tp_len;
867                 break;
868         }
869         if (unlikely(tp_len > size_max)) {
870                 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
871                 return -EMSGSIZE;
872         }
873
874         skb_reserve(skb, LL_RESERVED_SPACE(dev));
875         skb_reset_network_header(skb);
876
877         data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
878         to_write = tp_len;
879
880         if (sock->type == SOCK_DGRAM) {
881                 err = dev_hard_header(skb, dev, ntohs(proto), addr,
882                                 NULL, tp_len);
883                 if (unlikely(err < 0))
884                         return -EINVAL;
885         } else if (dev->hard_header_len) {
886                 /* net device doesn't like empty head */
887                 if (unlikely(tp_len <= dev->hard_header_len)) {
888                         pr_err("packet size is too short (%d < %d)\n",
889                                tp_len, dev->hard_header_len);
890                         return -EINVAL;
891                 }
892
893                 skb_push(skb, dev->hard_header_len);
894                 err = skb_store_bits(skb, 0, data,
895                                 dev->hard_header_len);
896                 if (unlikely(err))
897                         return err;
898
899                 data += dev->hard_header_len;
900                 to_write -= dev->hard_header_len;
901         }
902
903         err = -EFAULT;
904         page = virt_to_page(data);
905         offset = offset_in_page(data);
906         len_max = PAGE_SIZE - offset;
907         len = ((to_write > len_max) ? len_max : to_write);
908
909         skb->data_len = to_write;
910         skb->len += to_write;
911         skb->truesize += to_write;
912         atomic_add(to_write, &po->sk.sk_wmem_alloc);
913
914         while (likely(to_write)) {
915                 nr_frags = skb_shinfo(skb)->nr_frags;
916
917                 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
918                         pr_err("Packet exceed the number of skb frags(%lu)\n",
919                                MAX_SKB_FRAGS);
920                         return -EFAULT;
921                 }
922
923                 flush_dcache_page(page);
924                 get_page(page);
925                 skb_fill_page_desc(skb,
926                                 nr_frags,
927                                 page++, offset, len);
928                 to_write -= len;
929                 offset = 0;
930                 len_max = PAGE_SIZE;
931                 len = ((to_write > len_max) ? len_max : to_write);
932         }
933
934         return tp_len;
935 }
936
937 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
938 {
939         struct socket *sock;
940         struct sk_buff *skb;
941         struct net_device *dev;
942         __be16 proto;
943         int ifindex, err, reserve = 0;
944         void *ph;
945         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
946         int tp_len, size_max;
947         unsigned char *addr;
948         int len_sum = 0;
949         int status = 0;
950
951         sock = po->sk.sk_socket;
952
953         mutex_lock(&po->pg_vec_lock);
954
955         err = -EBUSY;
956         if (saddr == NULL) {
957                 ifindex = po->ifindex;
958                 proto   = po->num;
959                 addr    = NULL;
960         } else {
961                 err = -EINVAL;
962                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
963                         goto out;
964                 if (msg->msg_namelen < (saddr->sll_halen
965                                         + offsetof(struct sockaddr_ll,
966                                                 sll_addr)))
967                         goto out;
968                 ifindex = saddr->sll_ifindex;
969                 proto   = saddr->sll_protocol;
970                 addr    = saddr->sll_addr;
971         }
972
973         dev = dev_get_by_index(sock_net(&po->sk), ifindex);
974         err = -ENXIO;
975         if (unlikely(dev == NULL))
976                 goto out;
977
978         reserve = dev->hard_header_len;
979
980         err = -ENETDOWN;
981         if (unlikely(!(dev->flags & IFF_UP)))
982                 goto out_put;
983
984         size_max = po->tx_ring.frame_size
985                 - sizeof(struct skb_shared_info)
986                 - po->tp_hdrlen
987                 - LL_ALLOCATED_SPACE(dev)
988                 - sizeof(struct sockaddr_ll);
989
990         if (size_max > dev->mtu + reserve)
991                 size_max = dev->mtu + reserve;
992
993         do {
994                 ph = packet_current_frame(po, &po->tx_ring,
995                                 TP_STATUS_SEND_REQUEST);
996
997                 if (unlikely(ph == NULL)) {
998                         schedule();
999                         continue;
1000                 }
1001
1002                 status = TP_STATUS_SEND_REQUEST;
1003                 skb = sock_alloc_send_skb(&po->sk,
1004                                 LL_ALLOCATED_SPACE(dev)
1005                                 + sizeof(struct sockaddr_ll),
1006                                 0, &err);
1007
1008                 if (unlikely(skb == NULL))
1009                         goto out_status;
1010
1011                 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1012                                 addr);
1013
1014                 if (unlikely(tp_len < 0)) {
1015                         if (po->tp_loss) {
1016                                 __packet_set_status(po, ph,
1017                                                 TP_STATUS_AVAILABLE);
1018                                 packet_increment_head(&po->tx_ring);
1019                                 kfree_skb(skb);
1020                                 continue;
1021                         } else {
1022                                 status = TP_STATUS_WRONG_FORMAT;
1023                                 err = tp_len;
1024                                 goto out_status;
1025                         }
1026                 }
1027
1028                 skb->destructor = tpacket_destruct_skb;
1029                 __packet_set_status(po, ph, TP_STATUS_SENDING);
1030                 atomic_inc(&po->tx_ring.pending);
1031
1032                 status = TP_STATUS_SEND_REQUEST;
1033                 err = dev_queue_xmit(skb);
1034                 if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0))
1035                         goto out_xmit;
1036                 packet_increment_head(&po->tx_ring);
1037                 len_sum += tp_len;
1038         } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT))
1039                                         && (atomic_read(&po->tx_ring.pending))))
1040               );
1041
1042         err = len_sum;
1043         goto out_put;
1044
1045 out_xmit:
1046         skb->destructor = sock_wfree;
1047         atomic_dec(&po->tx_ring.pending);
1048 out_status:
1049         __packet_set_status(po, ph, status);
1050         kfree_skb(skb);
1051 out_put:
1052         dev_put(dev);
1053 out:
1054         mutex_unlock(&po->pg_vec_lock);
1055         return err;
1056 }
1057 #endif
1058
1059 static int packet_snd(struct socket *sock,
1060                           struct msghdr *msg, size_t len)
1061 {
1062         struct sock *sk = sock->sk;
1063         struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1064         struct sk_buff *skb;
1065         struct net_device *dev;
1066         __be16 proto;
1067         unsigned char *addr;
1068         int ifindex, err, reserve = 0;
1069
1070         /*
1071          *      Get and verify the address.
1072          */
1073
1074         if (saddr == NULL) {
1075                 struct packet_sock *po = pkt_sk(sk);
1076
1077                 ifindex = po->ifindex;
1078                 proto   = po->num;
1079                 addr    = NULL;
1080         } else {
1081                 err = -EINVAL;
1082                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1083                         goto out;
1084                 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1085                         goto out;
1086                 ifindex = saddr->sll_ifindex;
1087                 proto   = saddr->sll_protocol;
1088                 addr    = saddr->sll_addr;
1089         }
1090
1091
1092         dev = dev_get_by_index(sock_net(sk), ifindex);
1093         err = -ENXIO;
1094         if (dev == NULL)
1095                 goto out_unlock;
1096         if (sock->type == SOCK_RAW)
1097                 reserve = dev->hard_header_len;
1098
1099         err = -ENETDOWN;
1100         if (!(dev->flags & IFF_UP))
1101                 goto out_unlock;
1102
1103         err = -EMSGSIZE;
1104         if (len > dev->mtu+reserve)
1105                 goto out_unlock;
1106
1107         skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
1108                                 msg->msg_flags & MSG_DONTWAIT, &err);
1109         if (skb == NULL)
1110                 goto out_unlock;
1111
1112         skb_reserve(skb, LL_RESERVED_SPACE(dev));
1113         skb_reset_network_header(skb);
1114
1115         err = -EINVAL;
1116         if (sock->type == SOCK_DGRAM &&
1117             dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1118                 goto out_free;
1119
1120         /* Returns -EFAULT on error */
1121         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1122         if (err)
1123                 goto out_free;
1124
1125         skb->protocol = proto;
1126         skb->dev = dev;
1127         skb->priority = sk->sk_priority;
1128
1129         /*
1130          *      Now send it
1131          */
1132
1133         err = dev_queue_xmit(skb);
1134         if (err > 0 && (err = net_xmit_errno(err)) != 0)
1135                 goto out_unlock;
1136
1137         dev_put(dev);
1138
1139         return len;
1140
1141 out_free:
1142         kfree_skb(skb);
1143 out_unlock:
1144         if (dev)
1145                 dev_put(dev);
1146 out:
1147         return err;
1148 }
1149
1150 static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1151                 struct msghdr *msg, size_t len)
1152 {
1153 #ifdef CONFIG_PACKET_MMAP
1154         struct sock *sk = sock->sk;
1155         struct packet_sock *po = pkt_sk(sk);
1156         if (po->tx_ring.pg_vec)
1157                 return tpacket_snd(po, msg);
1158         else
1159 #endif
1160                 return packet_snd(sock, msg, len);
1161 }
1162
1163 /*
1164  *      Close a PACKET socket. This is fairly simple. We immediately go
1165  *      to 'closed' state and remove our protocol entry in the device list.
1166  */
1167
1168 static int packet_release(struct socket *sock)
1169 {
1170         struct sock *sk = sock->sk;
1171         struct packet_sock *po;
1172         struct net *net;
1173 #ifdef CONFIG_PACKET_MMAP
1174         struct tpacket_req req;
1175 #endif
1176
1177         if (!sk)
1178                 return 0;
1179
1180         net = sock_net(sk);
1181         po = pkt_sk(sk);
1182
1183         write_lock_bh(&net->packet.sklist_lock);
1184         sk_del_node_init(sk);
1185         sock_prot_inuse_add(net, sk->sk_prot, -1);
1186         write_unlock_bh(&net->packet.sklist_lock);
1187
1188         /*
1189          *      Unhook packet receive handler.
1190          */
1191
1192         if (po->running) {
1193                 /*
1194                  *      Remove the protocol hook
1195                  */
1196                 dev_remove_pack(&po->prot_hook);
1197                 po->running = 0;
1198                 po->num = 0;
1199                 __sock_put(sk);
1200         }
1201
1202         packet_flush_mclist(sk);
1203
1204 #ifdef CONFIG_PACKET_MMAP
1205         memset(&req, 0, sizeof(req));
1206
1207         if (po->rx_ring.pg_vec)
1208                 packet_set_ring(sk, &req, 1, 0);
1209
1210         if (po->tx_ring.pg_vec)
1211                 packet_set_ring(sk, &req, 1, 1);
1212 #endif
1213
1214         /*
1215          *      Now the socket is dead. No more input will appear.
1216          */
1217
1218         sock_orphan(sk);
1219         sock->sk = NULL;
1220
1221         /* Purge queues */
1222
1223         skb_queue_purge(&sk->sk_receive_queue);
1224         sk_refcnt_debug_release(sk);
1225
1226         sock_put(sk);
1227         return 0;
1228 }
1229
1230 /*
1231  *      Attach a packet hook.
1232  */
1233
1234 static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1235 {
1236         struct packet_sock *po = pkt_sk(sk);
1237         /*
1238          *      Detach an existing hook if present.
1239          */
1240
1241         lock_sock(sk);
1242
1243         spin_lock(&po->bind_lock);
1244         if (po->running) {
1245                 __sock_put(sk);
1246                 po->running = 0;
1247                 po->num = 0;
1248                 spin_unlock(&po->bind_lock);
1249                 dev_remove_pack(&po->prot_hook);
1250                 spin_lock(&po->bind_lock);
1251         }
1252
1253         po->num = protocol;
1254         po->prot_hook.type = protocol;
1255         po->prot_hook.dev = dev;
1256
1257         po->ifindex = dev ? dev->ifindex : 0;
1258
1259         if (protocol == 0)
1260                 goto out_unlock;
1261
1262         if (!dev || (dev->flags & IFF_UP)) {
1263                 dev_add_pack(&po->prot_hook);
1264                 sock_hold(sk);
1265                 po->running = 1;
1266         } else {
1267                 sk->sk_err = ENETDOWN;
1268                 if (!sock_flag(sk, SOCK_DEAD))
1269                         sk->sk_error_report(sk);
1270         }
1271
1272 out_unlock:
1273         spin_unlock(&po->bind_lock);
1274         release_sock(sk);
1275         return 0;
1276 }
1277
1278 /*
1279  *      Bind a packet socket to a device
1280  */
1281
1282 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1283                             int addr_len)
1284 {
1285         struct sock *sk = sock->sk;
1286         char name[15];
1287         struct net_device *dev;
1288         int err = -ENODEV;
1289
1290         /*
1291          *      Check legality
1292          */
1293
1294         if (addr_len != sizeof(struct sockaddr))
1295                 return -EINVAL;
1296         strlcpy(name, uaddr->sa_data, sizeof(name));
1297
1298         dev = dev_get_by_name(sock_net(sk), name);
1299         if (dev) {
1300                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1301                 dev_put(dev);
1302         }
1303         return err;
1304 }
1305
1306 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1307 {
1308         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1309         struct sock *sk = sock->sk;
1310         struct net_device *dev = NULL;
1311         int err;
1312
1313
1314         /*
1315          *      Check legality
1316          */
1317
1318         if (addr_len < sizeof(struct sockaddr_ll))
1319                 return -EINVAL;
1320         if (sll->sll_family != AF_PACKET)
1321                 return -EINVAL;
1322
1323         if (sll->sll_ifindex) {
1324                 err = -ENODEV;
1325                 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1326                 if (dev == NULL)
1327                         goto out;
1328         }
1329         err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1330         if (dev)
1331                 dev_put(dev);
1332
1333 out:
1334         return err;
1335 }
1336
1337 static struct proto packet_proto = {
1338         .name     = "PACKET",
1339         .owner    = THIS_MODULE,
1340         .obj_size = sizeof(struct packet_sock),
1341 };
1342
1343 /*
1344  *      Create a packet of type SOCK_PACKET.
1345  */
1346
1347 static int packet_create(struct net *net, struct socket *sock, int protocol)
1348 {
1349         struct sock *sk;
1350         struct packet_sock *po;
1351         __be16 proto = (__force __be16)protocol; /* weird, but documented */
1352         int err;
1353
1354         if (!capable(CAP_NET_RAW))
1355                 return -EPERM;
1356         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1357             sock->type != SOCK_PACKET)
1358                 return -ESOCKTNOSUPPORT;
1359
1360         sock->state = SS_UNCONNECTED;
1361
1362         err = -ENOBUFS;
1363         sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1364         if (sk == NULL)
1365                 goto out;
1366
1367         sock->ops = &packet_ops;
1368         if (sock->type == SOCK_PACKET)
1369                 sock->ops = &packet_ops_spkt;
1370
1371         sock_init_data(sock, sk);
1372
1373         po = pkt_sk(sk);
1374         sk->sk_family = PF_PACKET;
1375         po->num = proto;
1376
1377         sk->sk_destruct = packet_sock_destruct;
1378         sk_refcnt_debug_inc(sk);
1379
1380         /*
1381          *      Attach a protocol block
1382          */
1383
1384         spin_lock_init(&po->bind_lock);
1385         mutex_init(&po->pg_vec_lock);
1386         po->prot_hook.func = packet_rcv;
1387
1388         if (sock->type == SOCK_PACKET)
1389                 po->prot_hook.func = packet_rcv_spkt;
1390
1391         po->prot_hook.af_packet_priv = sk;
1392
1393         if (proto) {
1394                 po->prot_hook.type = proto;
1395                 dev_add_pack(&po->prot_hook);
1396                 sock_hold(sk);
1397                 po->running = 1;
1398         }
1399
1400         write_lock_bh(&net->packet.sklist_lock);
1401         sk_add_node(sk, &net->packet.sklist);
1402         sock_prot_inuse_add(net, &packet_proto, 1);
1403         write_unlock_bh(&net->packet.sklist_lock);
1404         return 0;
1405 out:
1406         return err;
1407 }
1408
1409 /*
1410  *      Pull a packet from our receive queue and hand it to the user.
1411  *      If necessary we block.
1412  */
1413
1414 static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1415                           struct msghdr *msg, size_t len, int flags)
1416 {
1417         struct sock *sk = sock->sk;
1418         struct sk_buff *skb;
1419         int copied, err;
1420         struct sockaddr_ll *sll;
1421
1422         err = -EINVAL;
1423         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1424                 goto out;
1425
1426 #if 0
1427         /* What error should we return now? EUNATTACH? */
1428         if (pkt_sk(sk)->ifindex < 0)
1429                 return -ENODEV;
1430 #endif
1431
1432         /*
1433          *      Call the generic datagram receiver. This handles all sorts
1434          *      of horrible races and re-entrancy so we can forget about it
1435          *      in the protocol layers.
1436          *
1437          *      Now it will return ENETDOWN, if device have just gone down,
1438          *      but then it will block.
1439          */
1440
1441         skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1442
1443         /*
1444          *      An error occurred so return it. Because skb_recv_datagram()
1445          *      handles the blocking we don't see and worry about blocking
1446          *      retries.
1447          */
1448
1449         if (skb == NULL)
1450                 goto out;
1451
1452         /*
1453          *      If the address length field is there to be filled in, we fill
1454          *      it in now.
1455          */
1456
1457         sll = &PACKET_SKB_CB(skb)->sa.ll;
1458         if (sock->type == SOCK_PACKET)
1459                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1460         else
1461                 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1462
1463         /*
1464          *      You lose any data beyond the buffer you gave. If it worries a
1465          *      user program they can ask the device for its MTU anyway.
1466          */
1467
1468         copied = skb->len;
1469         if (copied > len) {
1470                 copied = len;
1471                 msg->msg_flags |= MSG_TRUNC;
1472         }
1473
1474         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1475         if (err)
1476                 goto out_free;
1477
1478         sock_recv_timestamp(msg, sk, skb);
1479
1480         if (msg->msg_name)
1481                 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1482                        msg->msg_namelen);
1483
1484         if (pkt_sk(sk)->auxdata) {
1485                 struct tpacket_auxdata aux;
1486
1487                 aux.tp_status = TP_STATUS_USER;
1488                 if (skb->ip_summed == CHECKSUM_PARTIAL)
1489                         aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1490                 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1491                 aux.tp_snaplen = skb->len;
1492                 aux.tp_mac = 0;
1493                 aux.tp_net = skb_network_offset(skb);
1494                 aux.tp_vlan_tci = skb->vlan_tci;
1495
1496                 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1497         }
1498
1499         /*
1500          *      Free or return the buffer as appropriate. Again this
1501          *      hides all the races and re-entrancy issues from us.
1502          */
1503         err = (flags&MSG_TRUNC) ? skb->len : copied;
1504
1505 out_free:
1506         skb_free_datagram(sk, skb);
1507 out:
1508         return err;
1509 }
1510
1511 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1512                                int *uaddr_len, int peer)
1513 {
1514         struct net_device *dev;
1515         struct sock *sk = sock->sk;
1516
1517         if (peer)
1518                 return -EOPNOTSUPP;
1519
1520         uaddr->sa_family = AF_PACKET;
1521         dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
1522         if (dev) {
1523                 strlcpy(uaddr->sa_data, dev->name, 15);
1524                 dev_put(dev);
1525         } else
1526                 memset(uaddr->sa_data, 0, 14);
1527         *uaddr_len = sizeof(*uaddr);
1528
1529         return 0;
1530 }
1531
1532 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1533                           int *uaddr_len, int peer)
1534 {
1535         struct net_device *dev;
1536         struct sock *sk = sock->sk;
1537         struct packet_sock *po = pkt_sk(sk);
1538         struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1539
1540         if (peer)
1541                 return -EOPNOTSUPP;
1542
1543         sll->sll_family = AF_PACKET;
1544         sll->sll_ifindex = po->ifindex;
1545         sll->sll_protocol = po->num;
1546         dev = dev_get_by_index(sock_net(sk), po->ifindex);
1547         if (dev) {
1548                 sll->sll_hatype = dev->type;
1549                 sll->sll_halen = dev->addr_len;
1550                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1551                 dev_put(dev);
1552         } else {
1553                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1554                 sll->sll_halen = 0;
1555         }
1556         *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1557
1558         return 0;
1559 }
1560
1561 static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1562                          int what)
1563 {
1564         switch (i->type) {
1565         case PACKET_MR_MULTICAST:
1566                 if (what > 0)
1567                         return dev_mc_add(dev, i->addr, i->alen, 0);
1568                 else
1569                         return dev_mc_delete(dev, i->addr, i->alen, 0);
1570                 break;
1571         case PACKET_MR_PROMISC:
1572                 return dev_set_promiscuity(dev, what);
1573                 break;
1574         case PACKET_MR_ALLMULTI:
1575                 return dev_set_allmulti(dev, what);
1576                 break;
1577         case PACKET_MR_UNICAST:
1578                 if (what > 0)
1579                         return dev_unicast_add(dev, i->addr);
1580                 else
1581                         return dev_unicast_delete(dev, i->addr);
1582                 break;
1583         default:
1584                 break;
1585         }
1586         return 0;
1587 }
1588
1589 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1590 {
1591         for ( ; i; i = i->next) {
1592                 if (i->ifindex == dev->ifindex)
1593                         packet_dev_mc(dev, i, what);
1594         }
1595 }
1596
1597 static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1598 {
1599         struct packet_sock *po = pkt_sk(sk);
1600         struct packet_mclist *ml, *i;
1601         struct net_device *dev;
1602         int err;
1603
1604         rtnl_lock();
1605
1606         err = -ENODEV;
1607         dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1608         if (!dev)
1609                 goto done;
1610
1611         err = -EINVAL;
1612         if (mreq->mr_alen > dev->addr_len)
1613                 goto done;
1614
1615         err = -ENOBUFS;
1616         i = kmalloc(sizeof(*i), GFP_KERNEL);
1617         if (i == NULL)
1618                 goto done;
1619
1620         err = 0;
1621         for (ml = po->mclist; ml; ml = ml->next) {
1622                 if (ml->ifindex == mreq->mr_ifindex &&
1623                     ml->type == mreq->mr_type &&
1624                     ml->alen == mreq->mr_alen &&
1625                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1626                         ml->count++;
1627                         /* Free the new element ... */
1628                         kfree(i);
1629                         goto done;
1630                 }
1631         }
1632
1633         i->type = mreq->mr_type;
1634         i->ifindex = mreq->mr_ifindex;
1635         i->alen = mreq->mr_alen;
1636         memcpy(i->addr, mreq->mr_address, i->alen);
1637         i->count = 1;
1638         i->next = po->mclist;
1639         po->mclist = i;
1640         err = packet_dev_mc(dev, i, 1);
1641         if (err) {
1642                 po->mclist = i->next;
1643                 kfree(i);
1644         }
1645
1646 done:
1647         rtnl_unlock();
1648         return err;
1649 }
1650
1651 static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1652 {
1653         struct packet_mclist *ml, **mlp;
1654
1655         rtnl_lock();
1656
1657         for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1658                 if (ml->ifindex == mreq->mr_ifindex &&
1659                     ml->type == mreq->mr_type &&
1660                     ml->alen == mreq->mr_alen &&
1661                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1662                         if (--ml->count == 0) {
1663                                 struct net_device *dev;
1664                                 *mlp = ml->next;
1665                                 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1666                                 if (dev) {
1667                                         packet_dev_mc(dev, ml, -1);
1668                                         dev_put(dev);
1669                                 }
1670                                 kfree(ml);
1671                         }
1672                         rtnl_unlock();
1673                         return 0;
1674                 }
1675         }
1676         rtnl_unlock();
1677         return -EADDRNOTAVAIL;
1678 }
1679
1680 static void packet_flush_mclist(struct sock *sk)
1681 {
1682         struct packet_sock *po = pkt_sk(sk);
1683         struct packet_mclist *ml;
1684
1685         if (!po->mclist)
1686                 return;
1687
1688         rtnl_lock();
1689         while ((ml = po->mclist) != NULL) {
1690                 struct net_device *dev;
1691
1692                 po->mclist = ml->next;
1693                 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1694                 if (dev != NULL) {
1695                         packet_dev_mc(dev, ml, -1);
1696                         dev_put(dev);
1697                 }
1698                 kfree(ml);
1699         }
1700         rtnl_unlock();
1701 }
1702
1703 static int
1704 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1705 {
1706         struct sock *sk = sock->sk;
1707         struct packet_sock *po = pkt_sk(sk);
1708         int ret;
1709
1710         if (level != SOL_PACKET)
1711                 return -ENOPROTOOPT;
1712
1713         switch (optname) {
1714         case PACKET_ADD_MEMBERSHIP:
1715         case PACKET_DROP_MEMBERSHIP:
1716         {
1717                 struct packet_mreq_max mreq;
1718                 int len = optlen;
1719                 memset(&mreq, 0, sizeof(mreq));
1720                 if (len < sizeof(struct packet_mreq))
1721                         return -EINVAL;
1722                 if (len > sizeof(mreq))
1723                         len = sizeof(mreq);
1724                 if (copy_from_user(&mreq, optval, len))
1725                         return -EFAULT;
1726                 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1727                         return -EINVAL;
1728                 if (optname == PACKET_ADD_MEMBERSHIP)
1729                         ret = packet_mc_add(sk, &mreq);
1730                 else
1731                         ret = packet_mc_drop(sk, &mreq);
1732                 return ret;
1733         }
1734
1735 #ifdef CONFIG_PACKET_MMAP
1736         case PACKET_RX_RING:
1737         case PACKET_TX_RING:
1738         {
1739                 struct tpacket_req req;
1740
1741                 if (optlen < sizeof(req))
1742                         return -EINVAL;
1743                 if (copy_from_user(&req, optval, sizeof(req)))
1744                         return -EFAULT;
1745                 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1746         }
1747         case PACKET_COPY_THRESH:
1748         {
1749                 int val;
1750
1751                 if (optlen != sizeof(val))
1752                         return -EINVAL;
1753                 if (copy_from_user(&val, optval, sizeof(val)))
1754                         return -EFAULT;
1755
1756                 pkt_sk(sk)->copy_thresh = val;
1757                 return 0;
1758         }
1759         case PACKET_VERSION:
1760         {
1761                 int val;
1762
1763                 if (optlen != sizeof(val))
1764                         return -EINVAL;
1765                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1766                         return -EBUSY;
1767                 if (copy_from_user(&val, optval, sizeof(val)))
1768                         return -EFAULT;
1769                 switch (val) {
1770                 case TPACKET_V1:
1771                 case TPACKET_V2:
1772                         po->tp_version = val;
1773                         return 0;
1774                 default:
1775                         return -EINVAL;
1776                 }
1777         }
1778         case PACKET_RESERVE:
1779         {
1780                 unsigned int val;
1781
1782                 if (optlen != sizeof(val))
1783                         return -EINVAL;
1784                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1785                         return -EBUSY;
1786                 if (copy_from_user(&val, optval, sizeof(val)))
1787                         return -EFAULT;
1788                 po->tp_reserve = val;
1789                 return 0;
1790         }
1791         case PACKET_LOSS:
1792         {
1793                 unsigned int val;
1794
1795                 if (optlen != sizeof(val))
1796                         return -EINVAL;
1797                 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1798                         return -EBUSY;
1799                 if (copy_from_user(&val, optval, sizeof(val)))
1800                         return -EFAULT;
1801                 po->tp_loss = !!val;
1802                 return 0;
1803         }
1804 #endif
1805         case PACKET_AUXDATA:
1806         {
1807                 int val;
1808
1809                 if (optlen < sizeof(val))
1810                         return -EINVAL;
1811                 if (copy_from_user(&val, optval, sizeof(val)))
1812                         return -EFAULT;
1813
1814                 po->auxdata = !!val;
1815                 return 0;
1816         }
1817         case PACKET_ORIGDEV:
1818         {
1819                 int val;
1820
1821                 if (optlen < sizeof(val))
1822                         return -EINVAL;
1823                 if (copy_from_user(&val, optval, sizeof(val)))
1824                         return -EFAULT;
1825
1826                 po->origdev = !!val;
1827                 return 0;
1828         }
1829         default:
1830                 return -ENOPROTOOPT;
1831         }
1832 }
1833
1834 static int packet_getsockopt(struct socket *sock, int level, int optname,
1835                              char __user *optval, int __user *optlen)
1836 {
1837         int len;
1838         int val;
1839         struct sock *sk = sock->sk;
1840         struct packet_sock *po = pkt_sk(sk);
1841         void *data;
1842         struct tpacket_stats st;
1843
1844         if (level != SOL_PACKET)
1845                 return -ENOPROTOOPT;
1846
1847         if (get_user(len, optlen))
1848                 return -EFAULT;
1849
1850         if (len < 0)
1851                 return -EINVAL;
1852
1853         switch (optname) {
1854         case PACKET_STATISTICS:
1855                 if (len > sizeof(struct tpacket_stats))
1856                         len = sizeof(struct tpacket_stats);
1857                 spin_lock_bh(&sk->sk_receive_queue.lock);
1858                 st = po->stats;
1859                 memset(&po->stats, 0, sizeof(st));
1860                 spin_unlock_bh(&sk->sk_receive_queue.lock);
1861                 st.tp_packets += st.tp_drops;
1862
1863                 data = &st;
1864                 break;
1865         case PACKET_AUXDATA:
1866                 if (len > sizeof(int))
1867                         len = sizeof(int);
1868                 val = po->auxdata;
1869
1870                 data = &val;
1871                 break;
1872         case PACKET_ORIGDEV:
1873                 if (len > sizeof(int))
1874                         len = sizeof(int);
1875                 val = po->origdev;
1876
1877                 data = &val;
1878                 break;
1879 #ifdef CONFIG_PACKET_MMAP
1880         case PACKET_VERSION:
1881                 if (len > sizeof(int))
1882                         len = sizeof(int);
1883                 val = po->tp_version;
1884                 data = &val;
1885                 break;
1886         case PACKET_HDRLEN:
1887                 if (len > sizeof(int))
1888                         len = sizeof(int);
1889                 if (copy_from_user(&val, optval, len))
1890                         return -EFAULT;
1891                 switch (val) {
1892                 case TPACKET_V1:
1893                         val = sizeof(struct tpacket_hdr);
1894                         break;
1895                 case TPACKET_V2:
1896                         val = sizeof(struct tpacket2_hdr);
1897                         break;
1898                 default:
1899                         return -EINVAL;
1900                 }
1901                 data = &val;
1902                 break;
1903         case PACKET_RESERVE:
1904                 if (len > sizeof(unsigned int))
1905                         len = sizeof(unsigned int);
1906                 val = po->tp_reserve;
1907                 data = &val;
1908                 break;
1909         case PACKET_LOSS:
1910                 if (len > sizeof(unsigned int))
1911                         len = sizeof(unsigned int);
1912                 val = po->tp_loss;
1913                 data = &val;
1914                 break;
1915 #endif
1916         default:
1917                 return -ENOPROTOOPT;
1918         }
1919
1920         if (put_user(len, optlen))
1921                 return -EFAULT;
1922         if (copy_to_user(optval, data, len))
1923                 return -EFAULT;
1924         return 0;
1925 }
1926
1927
1928 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1929 {
1930         struct sock *sk;
1931         struct hlist_node *node;
1932         struct net_device *dev = data;
1933         struct net *net = dev_net(dev);
1934
1935         read_lock(&net->packet.sklist_lock);
1936         sk_for_each(sk, node, &net->packet.sklist) {
1937                 struct packet_sock *po = pkt_sk(sk);
1938
1939                 switch (msg) {
1940                 case NETDEV_UNREGISTER:
1941                         if (po->mclist)
1942                                 packet_dev_mclist(dev, po->mclist, -1);
1943                         /* fallthrough */
1944
1945                 case NETDEV_DOWN:
1946                         if (dev->ifindex == po->ifindex) {
1947                                 spin_lock(&po->bind_lock);
1948                                 if (po->running) {
1949                                         __dev_remove_pack(&po->prot_hook);
1950                                         __sock_put(sk);
1951                                         po->running = 0;
1952                                         sk->sk_err = ENETDOWN;
1953                                         if (!sock_flag(sk, SOCK_DEAD))
1954                                                 sk->sk_error_report(sk);
1955                                 }
1956                                 if (msg == NETDEV_UNREGISTER) {
1957                                         po->ifindex = -1;
1958                                         po->prot_hook.dev = NULL;
1959                                 }
1960                                 spin_unlock(&po->bind_lock);
1961                         }
1962                         break;
1963                 case NETDEV_UP:
1964                         spin_lock(&po->bind_lock);
1965                         if (dev->ifindex == po->ifindex && po->num &&
1966                             !po->running) {
1967                                 dev_add_pack(&po->prot_hook);
1968                                 sock_hold(sk);
1969                                 po->running = 1;
1970                         }
1971                         spin_unlock(&po->bind_lock);
1972                         break;
1973                 }
1974         }
1975         read_unlock(&net->packet.sklist_lock);
1976         return NOTIFY_DONE;
1977 }
1978
1979
1980 static int packet_ioctl(struct socket *sock, unsigned int cmd,
1981                         unsigned long arg)
1982 {
1983         struct sock *sk = sock->sk;
1984
1985         switch (cmd) {
1986         case SIOCOUTQ:
1987         {
1988                 int amount = sk_wmem_alloc_get(sk);
1989
1990                 return put_user(amount, (int __user *)arg);
1991         }
1992         case SIOCINQ:
1993         {
1994                 struct sk_buff *skb;
1995                 int amount = 0;
1996
1997                 spin_lock_bh(&sk->sk_receive_queue.lock);
1998                 skb = skb_peek(&sk->sk_receive_queue);
1999                 if (skb)
2000                         amount = skb->len;
2001                 spin_unlock_bh(&sk->sk_receive_queue.lock);
2002                 return put_user(amount, (int __user *)arg);
2003         }
2004         case SIOCGSTAMP:
2005                 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2006         case SIOCGSTAMPNS:
2007                 return sock_get_timestampns(sk, (struct timespec __user *)arg);
2008
2009 #ifdef CONFIG_INET
2010         case SIOCADDRT:
2011         case SIOCDELRT:
2012         case SIOCDARP:
2013         case SIOCGARP:
2014         case SIOCSARP:
2015         case SIOCGIFADDR:
2016         case SIOCSIFADDR:
2017         case SIOCGIFBRDADDR:
2018         case SIOCSIFBRDADDR:
2019         case SIOCGIFNETMASK:
2020         case SIOCSIFNETMASK:
2021         case SIOCGIFDSTADDR:
2022         case SIOCSIFDSTADDR:
2023         case SIOCSIFFLAGS:
2024                 if (!net_eq(sock_net(sk), &init_net))
2025                         return -ENOIOCTLCMD;
2026                 return inet_dgram_ops.ioctl(sock, cmd, arg);
2027 #endif
2028
2029         default:
2030                 return -ENOIOCTLCMD;
2031         }
2032         return 0;
2033 }
2034
2035 #ifndef CONFIG_PACKET_MMAP
2036 #define packet_mmap sock_no_mmap
2037 #define packet_poll datagram_poll
2038 #else
2039
2040 static unsigned int packet_poll(struct file *file, struct socket *sock,
2041                                 poll_table *wait)
2042 {
2043         struct sock *sk = sock->sk;
2044         struct packet_sock *po = pkt_sk(sk);
2045         unsigned int mask = datagram_poll(file, sock, wait);
2046
2047         spin_lock_bh(&sk->sk_receive_queue.lock);
2048         if (po->rx_ring.pg_vec) {
2049                 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2050                         mask |= POLLIN | POLLRDNORM;
2051         }
2052         spin_unlock_bh(&sk->sk_receive_queue.lock);
2053         spin_lock_bh(&sk->sk_write_queue.lock);
2054         if (po->tx_ring.pg_vec) {
2055                 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2056                         mask |= POLLOUT | POLLWRNORM;
2057         }
2058         spin_unlock_bh(&sk->sk_write_queue.lock);
2059         return mask;
2060 }
2061
2062
2063 /* Dirty? Well, I still did not learn better way to account
2064  * for user mmaps.
2065  */
2066
2067 static void packet_mm_open(struct vm_area_struct *vma)
2068 {
2069         struct file *file = vma->vm_file;
2070         struct socket *sock = file->private_data;
2071         struct sock *sk = sock->sk;
2072
2073         if (sk)
2074                 atomic_inc(&pkt_sk(sk)->mapped);
2075 }
2076
2077 static void packet_mm_close(struct vm_area_struct *vma)
2078 {
2079         struct file *file = vma->vm_file;
2080         struct socket *sock = file->private_data;
2081         struct sock *sk = sock->sk;
2082
2083         if (sk)
2084                 atomic_dec(&pkt_sk(sk)->mapped);
2085 }
2086
2087 static struct vm_operations_struct packet_mmap_ops = {
2088         .open   =       packet_mm_open,
2089         .close  =       packet_mm_close,
2090 };
2091
2092 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
2093 {
2094         int i;
2095
2096         for (i = 0; i < len; i++) {
2097                 if (likely(pg_vec[i]))
2098                         free_pages((unsigned long) pg_vec[i], order);
2099         }
2100         kfree(pg_vec);
2101 }
2102
2103 static inline char *alloc_one_pg_vec_page(unsigned long order)
2104 {
2105         gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2106
2107         return (char *) __get_free_pages(gfp_flags, order);
2108 }
2109
2110 static char **alloc_pg_vec(struct tpacket_req *req, int order)
2111 {
2112         unsigned int block_nr = req->tp_block_nr;
2113         char **pg_vec;
2114         int i;
2115
2116         pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2117         if (unlikely(!pg_vec))
2118                 goto out;
2119
2120         for (i = 0; i < block_nr; i++) {
2121                 pg_vec[i] = alloc_one_pg_vec_page(order);
2122                 if (unlikely(!pg_vec[i]))
2123                         goto out_free_pgvec;
2124         }
2125
2126 out:
2127         return pg_vec;
2128
2129 out_free_pgvec:
2130         free_pg_vec(pg_vec, order, block_nr);
2131         pg_vec = NULL;
2132         goto out;
2133 }
2134
2135 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2136                 int closing, int tx_ring)
2137 {
2138         char **pg_vec = NULL;
2139         struct packet_sock *po = pkt_sk(sk);
2140         int was_running, order = 0;
2141         struct packet_ring_buffer *rb;
2142         struct sk_buff_head *rb_queue;
2143         __be16 num;
2144         int err;
2145
2146         rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2147         rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2148
2149         err = -EBUSY;
2150         if (!closing) {
2151                 if (atomic_read(&po->mapped))
2152                         goto out;
2153                 if (atomic_read(&rb->pending))
2154                         goto out;
2155         }
2156
2157         if (req->tp_block_nr) {
2158                 /* Sanity tests and some calculations */
2159                 err = -EBUSY;
2160                 if (unlikely(rb->pg_vec))
2161                         goto out;
2162
2163                 switch (po->tp_version) {
2164                 case TPACKET_V1:
2165                         po->tp_hdrlen = TPACKET_HDRLEN;
2166                         break;
2167                 case TPACKET_V2:
2168                         po->tp_hdrlen = TPACKET2_HDRLEN;
2169                         break;
2170                 }
2171
2172                 err = -EINVAL;
2173                 if (unlikely((int)req->tp_block_size <= 0))
2174                         goto out;
2175                 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2176                         goto out;
2177                 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2178                                         po->tp_reserve))
2179                         goto out;
2180                 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2181                         goto out;
2182
2183                 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2184                 if (unlikely(rb->frames_per_block <= 0))
2185                         goto out;
2186                 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2187                                         req->tp_frame_nr))
2188                         goto out;
2189
2190                 err = -ENOMEM;
2191                 order = get_order(req->tp_block_size);
2192                 pg_vec = alloc_pg_vec(req, order);
2193                 if (unlikely(!pg_vec))
2194                         goto out;
2195         }
2196         /* Done */
2197         else {
2198                 err = -EINVAL;
2199                 if (unlikely(req->tp_frame_nr))
2200                         goto out;
2201         }
2202
2203         lock_sock(sk);
2204
2205         /* Detach socket from network */
2206         spin_lock(&po->bind_lock);
2207         was_running = po->running;
2208         num = po->num;
2209         if (was_running) {
2210                 __dev_remove_pack(&po->prot_hook);
2211                 po->num = 0;
2212                 po->running = 0;
2213                 __sock_put(sk);
2214         }
2215         spin_unlock(&po->bind_lock);
2216
2217         synchronize_net();
2218
2219         err = -EBUSY;
2220         mutex_lock(&po->pg_vec_lock);
2221         if (closing || atomic_read(&po->mapped) == 0) {
2222                 err = 0;
2223 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
2224                 spin_lock_bh(&rb_queue->lock);
2225                 pg_vec = XC(rb->pg_vec, pg_vec);
2226                 rb->frame_max = (req->tp_frame_nr - 1);
2227                 rb->head = 0;
2228                 rb->frame_size = req->tp_frame_size;
2229                 spin_unlock_bh(&rb_queue->lock);
2230
2231                 order = XC(rb->pg_vec_order, order);
2232                 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2233
2234                 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2235                 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2236                                                 tpacket_rcv : packet_rcv;
2237                 skb_queue_purge(rb_queue);
2238 #undef XC
2239                 if (atomic_read(&po->mapped))
2240                         pr_err("packet_mmap: vma is busy: %d\n",
2241                                atomic_read(&po->mapped));
2242         }
2243         mutex_unlock(&po->pg_vec_lock);
2244
2245         spin_lock(&po->bind_lock);
2246         if (was_running && !po->running) {
2247                 sock_hold(sk);
2248                 po->running = 1;
2249                 po->num = num;
2250                 dev_add_pack(&po->prot_hook);
2251         }
2252         spin_unlock(&po->bind_lock);
2253
2254         release_sock(sk);
2255
2256         if (pg_vec)
2257                 free_pg_vec(pg_vec, order, req->tp_block_nr);
2258 out:
2259         return err;
2260 }
2261
2262 static int packet_mmap(struct file *file, struct socket *sock,
2263                 struct vm_area_struct *vma)
2264 {
2265         struct sock *sk = sock->sk;
2266         struct packet_sock *po = pkt_sk(sk);
2267         unsigned long size, expected_size;
2268         struct packet_ring_buffer *rb;
2269         unsigned long start;
2270         int err = -EINVAL;
2271         int i;
2272
2273         if (vma->vm_pgoff)
2274                 return -EINVAL;
2275
2276         mutex_lock(&po->pg_vec_lock);
2277
2278         expected_size = 0;
2279         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2280                 if (rb->pg_vec) {
2281                         expected_size += rb->pg_vec_len
2282                                                 * rb->pg_vec_pages
2283                                                 * PAGE_SIZE;
2284                 }
2285         }
2286
2287         if (expected_size == 0)
2288                 goto out;
2289
2290         size = vma->vm_end - vma->vm_start;
2291         if (size != expected_size)
2292                 goto out;
2293
2294         start = vma->vm_start;
2295         for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2296                 if (rb->pg_vec == NULL)
2297                         continue;
2298
2299                 for (i = 0; i < rb->pg_vec_len; i++) {
2300                         struct page *page = virt_to_page(rb->pg_vec[i]);
2301                         int pg_num;
2302
2303                         for (pg_num = 0; pg_num < rb->pg_vec_pages;
2304                                         pg_num++, page++) {
2305                                 err = vm_insert_page(vma, start, page);
2306                                 if (unlikely(err))
2307                                         goto out;
2308                                 start += PAGE_SIZE;
2309                         }
2310                 }
2311         }
2312
2313         atomic_inc(&po->mapped);
2314         vma->vm_ops = &packet_mmap_ops;
2315         err = 0;
2316
2317 out:
2318         mutex_unlock(&po->pg_vec_lock);
2319         return err;
2320 }
2321 #endif
2322
2323
2324 static const struct proto_ops packet_ops_spkt = {
2325         .family =       PF_PACKET,
2326         .owner =        THIS_MODULE,
2327         .release =      packet_release,
2328         .bind =         packet_bind_spkt,
2329         .connect =      sock_no_connect,
2330         .socketpair =   sock_no_socketpair,
2331         .accept =       sock_no_accept,
2332         .getname =      packet_getname_spkt,
2333         .poll =         datagram_poll,
2334         .ioctl =        packet_ioctl,
2335         .listen =       sock_no_listen,
2336         .shutdown =     sock_no_shutdown,
2337         .setsockopt =   sock_no_setsockopt,
2338         .getsockopt =   sock_no_getsockopt,
2339         .sendmsg =      packet_sendmsg_spkt,
2340         .recvmsg =      packet_recvmsg,
2341         .mmap =         sock_no_mmap,
2342         .sendpage =     sock_no_sendpage,
2343 };
2344
2345 static const struct proto_ops packet_ops = {
2346         .family =       PF_PACKET,
2347         .owner =        THIS_MODULE,
2348         .release =      packet_release,
2349         .bind =         packet_bind,
2350         .connect =      sock_no_connect,
2351         .socketpair =   sock_no_socketpair,
2352         .accept =       sock_no_accept,
2353         .getname =      packet_getname,
2354         .poll =         packet_poll,
2355         .ioctl =        packet_ioctl,
2356         .listen =       sock_no_listen,
2357         .shutdown =     sock_no_shutdown,
2358         .setsockopt =   packet_setsockopt,
2359         .getsockopt =   packet_getsockopt,
2360         .sendmsg =      packet_sendmsg,
2361         .recvmsg =      packet_recvmsg,
2362         .mmap =         packet_mmap,
2363         .sendpage =     sock_no_sendpage,
2364 };
2365
2366 static struct net_proto_family packet_family_ops = {
2367         .family =       PF_PACKET,
2368         .create =       packet_create,
2369         .owner  =       THIS_MODULE,
2370 };
2371
2372 static struct notifier_block packet_netdev_notifier = {
2373         .notifier_call =        packet_notifier,
2374 };
2375
2376 #ifdef CONFIG_PROC_FS
2377 static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2378 {
2379         struct sock *s;
2380         struct hlist_node *node;
2381
2382         sk_for_each(s, node, &net->packet.sklist) {
2383                 if (!off--)
2384                         return s;
2385         }
2386         return NULL;
2387 }
2388
2389 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2390         __acquires(seq_file_net(seq)->packet.sklist_lock)
2391 {
2392         struct net *net = seq_file_net(seq);
2393         read_lock(&net->packet.sklist_lock);
2394         return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
2395 }
2396
2397 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2398 {
2399         struct net *net = seq_file_net(seq);
2400         ++*pos;
2401         return  (v == SEQ_START_TOKEN)
2402                 ? sk_head(&net->packet.sklist)
2403                 : sk_next((struct sock *)v) ;
2404 }
2405
2406 static void packet_seq_stop(struct seq_file *seq, void *v)
2407         __releases(seq_file_net(seq)->packet.sklist_lock)
2408 {
2409         struct net *net = seq_file_net(seq);
2410         read_unlock(&net->packet.sklist_lock);
2411 }
2412
2413 static int packet_seq_show(struct seq_file *seq, void *v)
2414 {
2415         if (v == SEQ_START_TOKEN)
2416                 seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
2417         else {
2418                 struct sock *s = v;
2419                 const struct packet_sock *po = pkt_sk(s);
2420
2421                 seq_printf(seq,
2422                            "%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
2423                            s,
2424                            atomic_read(&s->sk_refcnt),
2425                            s->sk_type,
2426                            ntohs(po->num),
2427                            po->ifindex,
2428                            po->running,
2429                            atomic_read(&s->sk_rmem_alloc),
2430                            sock_i_uid(s),
2431                            sock_i_ino(s));
2432         }
2433
2434         return 0;
2435 }
2436
2437 static const struct seq_operations packet_seq_ops = {
2438         .start  = packet_seq_start,
2439         .next   = packet_seq_next,
2440         .stop   = packet_seq_stop,
2441         .show   = packet_seq_show,
2442 };
2443
2444 static int packet_seq_open(struct inode *inode, struct file *file)
2445 {
2446         return seq_open_net(inode, file, &packet_seq_ops,
2447                             sizeof(struct seq_net_private));
2448 }
2449
2450 static const struct file_operations packet_seq_fops = {
2451         .owner          = THIS_MODULE,
2452         .open           = packet_seq_open,
2453         .read           = seq_read,
2454         .llseek         = seq_lseek,
2455         .release        = seq_release_net,
2456 };
2457
2458 #endif
2459
2460 static int packet_net_init(struct net *net)
2461 {
2462         rwlock_init(&net->packet.sklist_lock);
2463         INIT_HLIST_HEAD(&net->packet.sklist);
2464
2465         if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2466                 return -ENOMEM;
2467
2468         return 0;
2469 }
2470
2471 static void packet_net_exit(struct net *net)
2472 {
2473         proc_net_remove(net, "packet");
2474 }
2475
2476 static struct pernet_operations packet_net_ops = {
2477         .init = packet_net_init,
2478         .exit = packet_net_exit,
2479 };
2480
2481
2482 static void __exit packet_exit(void)
2483 {
2484         unregister_netdevice_notifier(&packet_netdev_notifier);
2485         unregister_pernet_subsys(&packet_net_ops);
2486         sock_unregister(PF_PACKET);
2487         proto_unregister(&packet_proto);
2488 }
2489
2490 static int __init packet_init(void)
2491 {
2492         int rc = proto_register(&packet_proto, 0);
2493
2494         if (rc != 0)
2495                 goto out;
2496
2497         sock_register(&packet_family_ops);
2498         register_pernet_subsys(&packet_net_ops);
2499         register_netdevice_notifier(&packet_netdev_notifier);
2500 out:
2501         return rc;
2502 }
2503
2504 module_init(packet_init);
2505 module_exit(packet_exit);
2506 MODULE_LICENSE("GPL");
2507 MODULE_ALIAS_NETPROTO(PF_PACKET);